mirror of
https://github.com/hyperknot/openfreemap.git
synced 2026-05-21 14:02:15 +00:00
extract hard links
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,10 @@
|
|||||||
*.pyc
|
*.pyc
|
||||||
*.egg-info
|
*.egg-info
|
||||||
|
|
||||||
|
*.sqlite
|
||||||
|
*.mbtiles
|
||||||
|
*.pbf
|
||||||
|
|
||||||
.env
|
.env
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
||||||
|
|||||||
14
docs/extract.md
Normal file
14
docs/extract.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
### native mapbox/mbutil
|
||||||
|
|
||||||
|
Filesystem 1K-blocks Used Available Use% Mounted on
|
||||||
|
/dev/loop0 1,474,386,100 1,119,622,516 354,763,584 76% /data/ofm/runs/planet_20231208_091355/mnt
|
||||||
|
|
||||||
|
Filesystem Inodes IUsed IFree IUse% Mounted on
|
||||||
|
/dev/loop0 393,216,000 269,252,174 123,963,826 69% /data/ofm/runs/planet_20231208_091355/mnt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### extract dedupl
|
||||||
|
|
||||||
|
39,570,683 dedupl files
|
||||||
|
|
||||||
20
scripts/mbtiles_extractor/ext/metadata.json
Normal file
20
scripts/mbtiles_extractor/ext/metadata.json
Normal file
File diff suppressed because one or more lines are too long
98
scripts/mbtiles_extractor/extract.py
Executable file
98
scripts/mbtiles_extractor/extract.py
Executable file
@@ -0,0 +1,98 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
import sqlite3
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.argument(
|
||||||
|
'mbtiles_path',
|
||||||
|
type=click.Path(exists=True, dir_okay=False, file_okay=True, path_type=Path),
|
||||||
|
)
|
||||||
|
@click.argument('dir_path', type=click.Path(dir_okay=True, file_okay=False, path_type=Path))
|
||||||
|
def cli(mbtiles_path: Path, dir_path: Path):
|
||||||
|
"""
|
||||||
|
Extracts a mbtiles sqlite to a folder
|
||||||
|
Deduplicating identical tiles as hard-links
|
||||||
|
|
||||||
|
used for reference: https://github.com/mapbox/mbutil
|
||||||
|
"""
|
||||||
|
|
||||||
|
if dir_path.exists() and any(dir_path.iterdir()):
|
||||||
|
sys.exit('Dir not empty')
|
||||||
|
|
||||||
|
dir_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(mbtiles_path)
|
||||||
|
c = conn.cursor()
|
||||||
|
|
||||||
|
write_metadata(c, dir_path=dir_path)
|
||||||
|
write_dedupl_files(c, dir_path=dir_path)
|
||||||
|
write_tile_file(c, dir_path=dir_path)
|
||||||
|
|
||||||
|
# remove dedupl files at the end
|
||||||
|
shutil.rmtree(dir_path / 'dedupl')
|
||||||
|
|
||||||
|
|
||||||
|
def write_metadata(c, *, dir_path):
|
||||||
|
metadata = dict(c.execute('select name, value from metadata').fetchall())
|
||||||
|
json.dump(metadata, open(dir_path / 'metadata.json', 'w'), indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def write_dedupl_files(c, *, dir_path):
|
||||||
|
# dedupl files
|
||||||
|
# write out the tiles_data files into a multi-level folder
|
||||||
|
total = c.execute('select count(*) from tiles_data').fetchone()[0]
|
||||||
|
|
||||||
|
c.execute('select tile_data_id, tile_data from tiles_data')
|
||||||
|
for i, row in enumerate(c, start=1):
|
||||||
|
dedupl_id = row[0]
|
||||||
|
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
||||||
|
dedupl_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(dedupl_path, 'wb') as fp:
|
||||||
|
fp.write(row[1])
|
||||||
|
print(f'written dedupl file {i}/{total}: {dedupl_id}')
|
||||||
|
|
||||||
|
|
||||||
|
def write_tile_file(c, *, dir_path):
|
||||||
|
total = c.execute('select count(*) from tiles_shallow').fetchone()[0]
|
||||||
|
|
||||||
|
c.execute('select zoom_level, tile_column, tile_row, tile_data_id from tiles_shallow')
|
||||||
|
for i, row in enumerate(c, start=1):
|
||||||
|
z = row[0]
|
||||||
|
x = row[1]
|
||||||
|
y = flip_y(z, row[2])
|
||||||
|
dedupl_id = row[3]
|
||||||
|
|
||||||
|
dedupl_path = dir_path / 'dedupl' / dedupl_helper_path(dedupl_id)
|
||||||
|
|
||||||
|
tile_path = dir_path / 'tiles' / str(z) / str(x) / f'{y}.pbf'
|
||||||
|
tile_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# create the hard link
|
||||||
|
tile_path.hardlink_to(dedupl_path)
|
||||||
|
print(f'hard link created {i}/{total}: {tile_path}')
|
||||||
|
|
||||||
|
|
||||||
|
def dedupl_helper_path(dedupl_id: int) -> Path:
|
||||||
|
"""
|
||||||
|
Naming 200 million files such that each subdir has max 1000 children
|
||||||
|
"""
|
||||||
|
|
||||||
|
str_num = f'{dedupl_id:09}'
|
||||||
|
l1 = str_num[:3]
|
||||||
|
l2 = str_num[3:6]
|
||||||
|
l3 = str_num[6:]
|
||||||
|
return Path(l1) / l2 / f'{l3}.pbf'
|
||||||
|
|
||||||
|
|
||||||
|
def flip_y(zoom, y):
|
||||||
|
return (2**zoom - 1) - y
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
cli()
|
||||||
@@ -27,8 +27,8 @@ rm -f image.ext4
|
|||||||
# make a sparse file
|
# make a sparse file
|
||||||
# make sure it's bigger then the current OSM output
|
# make sure it's bigger then the current OSM output
|
||||||
# less fragmentation with fallocate
|
# less fragmentation with fallocate
|
||||||
fallocate -l 150G image.ext4
|
fallocate -l 1500G image.ext4
|
||||||
#truncate -s 150G image.ext4
|
#truncate -s 1500G image.ext4
|
||||||
|
|
||||||
|
|
||||||
mke2fs -t ext4 -v \
|
mke2fs -t ext4 -v \
|
||||||
@@ -42,10 +42,14 @@ mke2fs -t ext4 -v \
|
|||||||
mkdir mnt
|
mkdir mnt
|
||||||
sudo mount -v \
|
sudo mount -v \
|
||||||
-t ext4 \
|
-t ext4 \
|
||||||
-o nobarrier,noatime,data=writeback,commit=100 \
|
-o nobarrier,noatime \
|
||||||
image.ext4 mnt
|
image.ext4 mnt
|
||||||
|
|
||||||
sudo /data/ofm/tile_gen/venv/bin/mb-util output.mbtiles mnt/extract
|
sudo chown ofm:ofm -R mnt
|
||||||
|
|
||||||
|
../../tile_gen/venv/bin/python ../../tile_gen/extract.py output.mbtiles mnt/extract \
|
||||||
|
> "extract_out.log" 2> "extract_err.log"
|
||||||
|
|
||||||
sudo umount mnt
|
sudo umount mnt
|
||||||
|
|
||||||
resize2fs -M image.ext4
|
resize2fs -M image.ext4
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ java -Xmx1g \
|
|||||||
`# Store temporary node locations at fixed positions in a memory-mapped file` \
|
`# Store temporary node locations at fixed positions in a memory-mapped file` \
|
||||||
--nodemap-type=array --storage=mmap \
|
--nodemap-type=array --storage=mmap \
|
||||||
--force \
|
--force \
|
||||||
> "output.log" 2> "err.log"
|
> "planetiler_out.log" 2> "planetiler_err.log"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ java -Xmx30g \
|
|||||||
`# Store temporary node locations at fixed positions in a memory-mapped file` \
|
`# Store temporary node locations at fixed positions in a memory-mapped file` \
|
||||||
--nodemap-type=array --storage=mmap \
|
--nodemap-type=array --storage=mmap \
|
||||||
--force \
|
--force \
|
||||||
> "output.log" 2> "err.log"
|
> "planetiler_out.log" 2> "planetiler_err.log"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user