mirror of
https://github.com/hyperknot/openfreemap.git
synced 2026-05-21 14:02:15 +00:00
work
This commit is contained in:
134
docs/extract.md
134
docs/extract.md
@@ -1,4 +1,9 @@
|
|||||||
### native mapbox/mbutil
|
# Comparing filesystem stats after extraction
|
||||||
|
|
||||||
|
Run: *planet_20231208*
|
||||||
|
|
||||||
|
|
||||||
|
## native mapbox/mbutil
|
||||||
|
|
||||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||||
/dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76%
|
/dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76%
|
||||||
@@ -7,98 +12,103 @@ Filesystem Inodes IUsed IFree IUse% Mounted on
|
|||||||
/dev/loop0 393,216,000 269,252,174 123,963,826 69%
|
/dev/loop0 393,216,000 269,252,174 123,963,826 69%
|
||||||
|
|
||||||
|
|
||||||
|
## deduplicated on ext4
|
||||||
### extract dedupl ext4
|
|
||||||
|
|
||||||
39,570,683 dedupl files
|
39,570,683 dedupl files
|
||||||
|
|
||||||
df -h mnt
|
df -h mnt_rw
|
||||||
Filesystem Size Used Avail Use% Mounted on
|
Filesystem Size Used Avail Use% Mounted on
|
||||||
/dev/loop0 1.4T 187G 1.2T 14%
|
/dev/loop0 1.4T 187G 1.2T 14%
|
||||||
|
|
||||||
df mnt
|
df mnt_rw
|
||||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||||
/dev/loop0 1474386100 195624664 1278761436 14%
|
/dev/loop0 1474386100 195624664 1278761436 14%
|
||||||
|
|
||||||
df -i mnt
|
df -i mnt_rw
|
||||||
Filesystem Inodes IUsed IFree IUse% Mounted on
|
Filesystem Inodes IUsed IFree IUse% Mounted on
|
||||||
/dev/loop0 393216000 39614466 353601534 11%
|
/dev/loop0 393216000 39614466 353601534 11%
|
||||||
|
|
||||||
--- after resize2fs ext4
|
|
||||||
|
|
||||||
df -h mnt
|
## deduplicated on BTRFS
|
||||||
|
|
||||||
|
### creation params
|
||||||
|
|
||||||
|
```
|
||||||
|
mkfs.btrfs -m single
|
||||||
|
mount -o noacl,nobarrier,noatime,max_inline=4096
|
||||||
|
```
|
||||||
|
|
||||||
|
### df
|
||||||
|
|
||||||
|
```
|
||||||
|
df -h mnt_rw
|
||||||
Filesystem Size Used Avail Use% Mounted on
|
Filesystem Size Used Avail Use% Mounted on
|
||||||
/dev/loop0 189G 187G 2.4G 99%
|
/dev/loop3 300G 139G 161G 47%
|
||||||
|
```
|
||||||
|
|
||||||
df mnt
|
```
|
||||||
|
df mnt_rw
|
||||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||||
/dev/loop0 198098376 195624664 2473712 99%
|
/dev/loop3 314572800 145030788 168339820 47%
|
||||||
|
```
|
||||||
|
|
||||||
df -i mnt
|
### btrfs
|
||||||
Filesystem Inodes IUsed IFree IUse% Mounted on
|
|
||||||
/dev/loop0 52854784 39614466 13240318 75%
|
|
||||||
|
|
||||||
|
```
|
||||||
### extract dedupl btrfs
|
btrfs filesystem df mnt_rw
|
||||||
note: this test uses compress-force=lzo, but it's actually uncompressible data since the PBF files are gzipped already
|
|
||||||
|
|
||||||
|
|
||||||
df -h mnt
|
|
||||||
Filesystem Size Used Avail Use% Mounted on
|
|
||||||
/dev/loop0 300G 97G 204G 33%
|
|
||||||
|
|
||||||
df mnt
|
|
||||||
Filesystem 1K-blocks Used Available Use% Mounted on
|
|
||||||
/dev/loop0 314572800 100925972 213428604 33%
|
|
||||||
|
|
||||||
btrfs filesystem df mnt
|
|
||||||
Data, single: total=48.01GiB, used=47.45GiB
|
Data, single: total=48.01GiB, used=47.45GiB
|
||||||
System, single: total=4.00MiB, used=16.00KiB
|
System, single: total=4.00MiB, used=16.00KiB
|
||||||
Metadata, single: total=49.01GiB, used=48.32GiB
|
Metadata, single: total=92.01GiB, used=90.37GiB
|
||||||
GlobalReserve, single: total=496.86MiB, used=0.00B
|
GlobalReserve, single: total=512.00MiB, used=0.00B
|
||||||
|
```
|
||||||
|
|
||||||
btrfs filesystem du -s mnt
|
|
||||||
|
```
|
||||||
|
btrfs filesystem du -s mnt_rw
|
||||||
Total Exclusive Set shared Filename
|
Total Exclusive Set shared Filename
|
||||||
47.45GiB 47.45GiB 0.00B mnt
|
47.45GiB 47.45GiB 0.00B mnt_rw
|
||||||
|
```
|
||||||
|
```
|
||||||
|
btrfs filesystem show mnt_rw
|
||||||
|
Total devices 1 FS bytes used 137.81GiB
|
||||||
|
devid 1 size 300.00GiB used 140.02GiB path /dev/loop3
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
sudo btrfs filesystem show mnt
|
btrfs filesystem usage mnt_rw
|
||||||
Label: none uuid: ce7615d1-0ee5-460b-bdb0-7c4d214eecc4
|
|
||||||
Total devices 1 FS bytes used 95.76GiB
|
|
||||||
devid 1 size 300.00GiB used 97.02GiB path /dev/loop0
|
|
||||||
|
|
||||||
sudo btrfs filesystem usage mnt
|
|
||||||
Overall:
|
Overall:
|
||||||
Device size: 300.00GiB
|
Device size: 300.00GiB
|
||||||
Device allocated: 97.02GiB
|
Device allocated: 140.02GiB
|
||||||
Device unallocated: 202.98GiB
|
Device unallocated: 159.98GiB
|
||||||
Device missing: 0.00B
|
Device missing: 0.00B
|
||||||
Used: 95.76GiB
|
Used: 137.81GiB
|
||||||
Free (estimated): 203.54GiB (min: 203.54GiB)
|
Free (estimated): 160.54GiB (min: 160.54GiB)
|
||||||
Free (statfs, df): 203.54GiB
|
Free (statfs, df): 160.54GiB
|
||||||
Data ratio: 1.00
|
Data ratio: 1.00
|
||||||
Metadata ratio: 1.00
|
Metadata ratio: 1.00
|
||||||
Global reserve: 501.22MiB (used: 0.00B)
|
Global reserve: 512.00MiB (used: 0.00B)
|
||||||
Multiple profiles: no
|
Multiple profiles: no
|
||||||
|
|
||||||
Data,single: Size:48.01GiB, Used:47.45GiB (98.83%)
|
Data,single: Size:48.01GiB, Used:47.45GiB (98.83%)
|
||||||
/dev/loop0 48.01GiB
|
/dev/loop3 48.01GiB
|
||||||
|
|
||||||
Metadata,single: Size:49.01GiB, Used:48.32GiB (98.60%)
|
Metadata,single: Size:92.01GiB, Used:90.37GiB (98.22%)
|
||||||
/dev/loop0 49.01GiB
|
/dev/loop3 92.01GiB
|
||||||
|
|
||||||
System,single: Size:4.00MiB, Used:16.00KiB (0.39%)
|
System,single: Size:4.00MiB, Used:16.00KiB (0.39%)
|
||||||
/dev/loop0 4.00MiB
|
/dev/loop3 4.00MiB
|
||||||
|
|
||||||
Unallocated:
|
Unallocated:
|
||||||
/dev/loop0 202.98GiB
|
/dev/loop3 159.98GiB
|
||||||
|
```
|
||||||
|
|
||||||
|
### compsize
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
compsize -x mnt
|
compsize -x mnt_rw
|
||||||
Processed 44249086 files, 3458702 regular extents (3800454 refs), 40448654 inline.
|
Processed 308790063 files, 3458682 regular extents (6917363 refs), 301872700 inline.
|
||||||
Type Perc Disk Usage Uncompressed Referenced
|
Type Perc Disk Usage Uncompressed Referenced
|
||||||
TOTAL 99% 74G 74G 80G
|
TOTAL 100% 118G 118G 165G
|
||||||
none 100% 74G 74G 80G
|
none 100% 118G 118G 165G
|
||||||
lzo 20% 4.0K 20K 20K
|
```
|
||||||
|
|
||||||
5
ideas-filesystems.txt
Normal file
5
ideas-filesystems.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
ext4
|
||||||
|
|
||||||
|
Reduce the inode size: 256 -> -I 128
|
||||||
|
|
||||||
|
Reduce the inode ratio: The inode ratio is the number of blocks per inode. You can increase the inode ratio to reduce the number of inodes created. This can be done with the -i option when creating the filesystem. For example, -i 8192 will create one inode every 8192 blocks.
|
||||||
1
scripts/benchmark/.gitignore
vendored
1
scripts/benchmark/.gitignore
vendored
@@ -1 +0,0 @@
|
|||||||
*.txt
|
|
||||||
4
scripts/benchmark/command.txt
Normal file
4
scripts/benchmark/command.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
wrk -c1000 -d10s -t1 -s /data/ofm/benchmark/wrk_custom_list.lua http://localhost
|
||||||
|
|
||||||
|
# -t1 - needs to be single treaded, otherwise the urls would be read not in sequence
|
||||||
|
|
||||||
@@ -22,20 +22,20 @@ def cli(mbtiles_path: Path, dir_path: Path):
|
|||||||
used for reference: https://github.com/mapbox/mbutil
|
used for reference: https://github.com/mapbox/mbutil
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if dir_path.exists() and any(dir_path.iterdir()):
|
# if dir_path.exists() and any(dir_path.iterdir()):
|
||||||
sys.exit('Dir not empty')
|
# sys.exit('Dir not empty')
|
||||||
|
|
||||||
dir_path.mkdir(exist_ok=True)
|
dir_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
conn = sqlite3.connect(mbtiles_path)
|
conn = sqlite3.connect(mbtiles_path)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
|
||||||
write_metadata(c, dir_path=dir_path)
|
# write_metadata(c, dir_path=dir_path)
|
||||||
write_dedupl_files(c, dir_path=dir_path)
|
# write_dedupl_files(c, dir_path=dir_path)
|
||||||
write_tile_file(c, dir_path=dir_path)
|
write_tile_files(c, dir_path=dir_path)
|
||||||
|
|
||||||
# remove dedupl files at the end
|
# remove dedupl files at the end
|
||||||
shutil.rmtree(dir_path / 'dedupl')
|
# shutil.rmtree(dir_path / 'dedupl')
|
||||||
|
|
||||||
|
|
||||||
def write_metadata(c, *, dir_path):
|
def write_metadata(c, *, dir_path):
|
||||||
@@ -49,6 +49,7 @@ def write_dedupl_files(c, *, dir_path):
|
|||||||
total = c.execute('select count(*) from tiles_data').fetchone()[0]
|
total = c.execute('select count(*) from tiles_data').fetchone()[0]
|
||||||
|
|
||||||
c.execute('select tile_data_id, tile_data from tiles_data')
|
c.execute('select tile_data_id, tile_data from tiles_data')
|
||||||
|
|
||||||
for i, row in enumerate(c, start=1):
|
for i, row in enumerate(c, start=1):
|
||||||
dedupl_id = row[0]
|
dedupl_id = row[0]
|
||||||
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
||||||
@@ -58,24 +59,53 @@ def write_dedupl_files(c, *, dir_path):
|
|||||||
print(f'written dedupl file {i}/{total}')
|
print(f'written dedupl file {i}/{total}')
|
||||||
|
|
||||||
|
|
||||||
def write_tile_file(c, *, dir_path):
|
def write_tile_files(c, *, dir_path):
|
||||||
total = c.execute('select count(*) from tiles_shallow').fetchone()[0]
|
total = c.execute('select count(*) from tiles_shallow').fetchone()[0]
|
||||||
|
|
||||||
|
bug_fix_dict = {}
|
||||||
|
|
||||||
c.execute('select zoom_level, tile_column, tile_row, tile_data_id from tiles_shallow')
|
c.execute('select zoom_level, tile_column, tile_row, tile_data_id from tiles_shallow')
|
||||||
for i, row in enumerate(c, start=1):
|
for i, row in enumerate(c, start=1):
|
||||||
|
if i < 4678400:
|
||||||
|
continue
|
||||||
|
|
||||||
z = row[0]
|
z = row[0]
|
||||||
x = row[1]
|
x = row[1]
|
||||||
y = flip_y(z, row[2])
|
y = flip_y(z, row[2])
|
||||||
dedupl_id = row[3]
|
dedupl_id = row[3]
|
||||||
|
|
||||||
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
||||||
|
dedupl_path_fixed = get_fixed_dedupl_name(bug_fix_dict, dedupl_path)
|
||||||
|
|
||||||
tile_path = dir_path / 'tiles' / str(z) / str(x) / f'{y}.pbf'
|
tile_path = dir_path / 'tiles' / str(z) / str(x) / f'{y}.pbf'
|
||||||
tile_path.parent.mkdir(parents=True, exist_ok=True)
|
tile_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if tile_path.is_file():
|
||||||
|
continue
|
||||||
|
|
||||||
# create the hard link
|
# create the hard link
|
||||||
tile_path.hardlink_to(dedupl_path)
|
try:
|
||||||
print(f'hard link created {i}/{total}: {tile_path}')
|
tile_path.hardlink_to(dedupl_path_fixed)
|
||||||
|
print(f'hard link created {i}/{total} {i / total * 100:.1f}%: {tile_path}')
|
||||||
|
except OSError as e:
|
||||||
|
# fixing BTRFS's 64k max link limit
|
||||||
|
if e.errno == 31:
|
||||||
|
bug_fix_dict.setdefault(dedupl_path, 0)
|
||||||
|
bug_fix_dict[dedupl_path] += 1
|
||||||
|
fixed_path = get_fixed_dedupl_name(bug_fix_dict, dedupl_path)
|
||||||
|
shutil.copyfile(dedupl_path, fixed_path)
|
||||||
|
print(f'Created fixed dedupl file: {fixed_path}')
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# last file: 14/16383/0.pbf
|
||||||
|
|
||||||
|
|
||||||
|
def get_fixed_dedupl_name(bug_fix_dict, dedupl_path):
|
||||||
|
if dedupl_path in bug_fix_dict:
|
||||||
|
return dedupl_path.with_name(f'{dedupl_path.name}-{bug_fix_dict[dedupl_path]}')
|
||||||
|
else:
|
||||||
|
return dedupl_path
|
||||||
|
|
||||||
|
|
||||||
def dedupl_helper_path(dedupl_id: int) -> Path:
|
def dedupl_helper_path(dedupl_id: int) -> Path:
|
||||||
|
|||||||
@@ -25,12 +25,13 @@ sudo mount -v \
|
|||||||
|
|
||||||
sudo chown ofm:ofm -R mnt_rw
|
sudo chown ofm:ofm -R mnt_rw
|
||||||
|
|
||||||
../../tile_gen/venv/bin/python ../../tile_gen/extract.py output.mbtiles mnt_rw/extract \
|
../../tile_gen/venv/bin/python ../../tile_gen/extract_mbtiles.py output.mbtiles mnt_rw/extract \
|
||||||
> "extract_out.log" 2> "extract_err.log"
|
> "extract_out.log" 2> "extract_err.log"
|
||||||
|
|
||||||
sudo umount mnt_rw
|
sudo umount mnt_rw
|
||||||
|
|
||||||
../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs
|
sudo ../../tile_gen/venv/bin/python ../../tile_gen/shrink_btrfs.py image.btrfs \
|
||||||
|
> "shrink_out.log" 2> "shrink_err.log"
|
||||||
|
|
||||||
# pigz -k image.btrfs --fast
|
# pigz -k image.btrfs --fast
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
# from /etc/mke2fs.conf
|
# from /etc/mke2fs.conf
|
||||||
# defaults: has_journal,extent,huge_file,flex_bg,metadata_csum,64bit,dir_nlink,extra_isize
|
# defaults: has_journal,extent,huge_file,flex_bg,metadata_csum,64bit,dir_nlink,extra_isize
|
||||||
# disabling journalling, since it's a read-only fs, as well as other unused features
|
# disabling journalling, since it's a read-only fs, as well as other unused features
|
||||||
|
# extent is actually needed for tail packing small files
|
||||||
#
|
#
|
||||||
# -E extended-options
|
# -E extended-options
|
||||||
# lazy_itable_init - inode table is fully initialized at the time of file system creation
|
# lazy_itable_init - inode table is fully initialized at the time of file system creation
|
||||||
@@ -32,9 +33,10 @@ fallocate -l 300G image.ext4
|
|||||||
mke2fs -t ext4 -v \
|
mke2fs -t ext4 -v \
|
||||||
-m 0 \
|
-m 0 \
|
||||||
-F \
|
-F \
|
||||||
-O ^has_journal,^extent,^huge_file,^metadata_csum,^64bit,^extra_isize \
|
-O ^has_journal,^huge_file,^metadata_csum,^64bit,^extra_isize \
|
||||||
-E lazy_itable_init=0,nodiscard \
|
-E lazy_itable_init=0,nodiscard \
|
||||||
-T small \
|
-T small \
|
||||||
|
-I 128 \
|
||||||
image.ext4
|
image.ext4
|
||||||
|
|
||||||
mkdir mnt
|
mkdir mnt
|
||||||
|
|||||||
Reference in New Issue
Block a user