From: Jakob Cornell <jakob+gpg@jcornell.net>
Date: Thu, 30 Jun 2022 02:51:55 +0000 (-0500)
Subject: Rework DJ verify v1 to handle weird piece lengths
X-Git-Url: https://jcornell.net/gitweb/gitweb.cgi?a=commitdiff_plain;h=0cb943ba386902c3a8dff31b2c198f9290fbcada;p=eros.git

Rework DJ verify v1 to handle weird piece lengths
---

diff --git a/disk_jumble/src/disk_jumble/verify.py b/disk_jumble/src/disk_jumble/verify.py
index 2ed32f2..6b309bc 100644
--- a/disk_jumble/src/disk_jumble/verify.py
+++ b/disk_jumble/src/disk_jumble/verify.py
@@ -8,7 +8,6 @@ import hashlib
 import heapq
 import io
 import itertools
-import math
 
 from psycopg2.extras import NumericRange
 import psycopg2
@@ -39,12 +38,18 @@ class _TorrentInfo:
 		return cls(torrent_id, length, info[b"piece length"], hashes)
 
 
+@dataclass
+class _BlockSpan:
+	blocks: range
+	initial_offset: int
+
+
 @dataclass
 class _Run:
 	entity_id: bytes
 	entity_length: int
 	piece_num: int
-	block_ranges: List[range]
+	block_spans: List[_BlockSpan]
 	hash: bytes
 
 
@@ -62,7 +67,7 @@ class _BadSector(Exception):
 
 
 def _run_sort_key(run: _Run):
-	return run.block_ranges[0].start
+	return run.block_spans[0].blocks.start
 
 
 def _get_target_ranges(conn, disk_id: int, limit: Optional[int]) -> List[range]:
@@ -92,20 +97,12 @@ def _get_target_ranges(conn, disk_id: int, limit: Optional[int]) -> List[range]:
 	return ranges
 
 
-def _get_v1_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> List[_V1Run]:
+def _get_v1_worklist(conn, disk_id: int, target_ranges: List[NumericRange], block_size: int) -> List[_V1Run]:
 	"""
 	How this works: First, we fetch some info about each torrent on the disk that has data within the requested blocks.
 	Then, we make one query per torrent (because each one may have a different piece size) to determine which disk
-	blocks to verify.  Here's how that works for a given torrent:
-
-	1. Select slabs on the current disk that have data from the appropriate torrent.
-	2. Join with the list of requested block ranges to obtain which slabs and what portions of each are to be verified.
-	3. Convert each requested range to a set of piece numbers and deduplicate.
-	4. Join again with the slab table to get the complete block ranges required to cover each requested piece.
-	5. Order by piece number (to facilitate grouping by piece), then entity offset (to produce an ordered run for each
-	piece).
-
-	The runs are then reordered by the number of their first disk block.
+	blocks to verify. The runs are reordered in Python by the number of their first disk block, so the progress of the
+	run can be easily monitored.
 	"""
 	cursor = conn.cursor("v1_worklist")
 	cursor.execute(
@@ -126,7 +123,10 @@ def _get_v1_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> L
 	]
 
 	for i in infos:
-		if i.piece_length % BLOCK_SIZE != 0:
+		# precondition for disk read logic below
+		assert i.piece_length >= block_size
+
+		if i.piece_length % block_size != 0:
 			warn(f"entity {i.id.hex()} has invalid piece length")
 
 	runs = []
@@ -136,10 +136,12 @@ def _get_v1_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> L
 			"""
 				with
 					relevant_slab as (
+						-- select slabs on this disk
 						select * from diskjumble.slab
 						where disk_id = %(disk_id)s and entity_id = %(entity_id)s
 					),
 					targeted_slab as (
+						-- select slabs that cover our requested block range
 						select relevant_slab.*, target_range * disk_blocks as target_span
 						from
 							relevant_slab
@@ -147,40 +149,54 @@ def _get_v1_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> L
 								on target_range && disk_blocks
 					),
 					targeted_piece as (
+						-- select piece numbers that cover those slabs
 						select distinct
-							(
-								entity_offset
-								+ (
-									(generate_series(lower(target_span), upper(target_span) - 1) - lower(disk_blocks))
-									* %(block_size)s
-								)
-							) / %(piece_length)s
+							generate_series(
+								(entity_offset + (lower(target_span) - lower(disk_blocks)) * %(block_size)s) / %(piece_length)s,
+								ceil(
+									(entity_offset + (upper(target_span) - lower(disk_blocks)) * %(block_size)s)::numeric(38, 19)
+									/ %(piece_length)s
+								)::bigint - 1
+							)
 								as piece_num
 						from targeted_slab
 					),
-					out as (
+					out_a as (
+						-- select slab info for each piece we're checking
 						select
 							entity_offset,
 							disk_blocks,
 							piece_num,
-							disk_blocks * int8range(
-								(piece_num * %(piece_length)s - entity_offset) / %(block_size)s + lower(disk_blocks),
-								((piece_num + 1) * %(piece_length)s - entity_offset) / %(block_size)s + lower(disk_blocks)
-							) as use_blocks,
+							int8range(
+								piece_num * %(piece_length)s - entity_offset,
+								(piece_num + 1) * %(piece_length)s - entity_offset
+							) as range_in_slab,  -- byte range of piece within slab
 							crypt_key
 						from relevant_slab cross join targeted_piece
+						-- the join condition is the 'where' of the main query
+					),
+					out_b as (
+						select
+							entity_offset, disk_blocks, piece_num, crypt_key,
+							disk_blocks * int8range(
+								lower(range_in_slab) / %(block_size)s + lower(disk_blocks),
+								ceil(upper(range_in_slab)::numeric(38, 19) / %(block_size)s)::bigint + lower(disk_blocks)
+							) as use_blocks,
+							case when lower(range_in_slab) >= 0 then lower(range_in_slab) %% %(block_size)s else 0 end
+								as initial_offset
+						from out_a
 					)
-				select piece_num, use_blocks, crypt_key from out
+				select piece_num, use_blocks, initial_offset, crypt_key from out_b
 				where not isempty(use_blocks)
 				order by
-					piece_num,
-					entity_offset + (lower(use_blocks) - lower(disk_blocks)) * %(block_size)s
+					piece_num,  -- facilitate grouping by piece
+					entity_offset + (lower(use_blocks) - lower(disk_blocks)) * %(block_size)s  -- ordered run within each piece
 			""",
 			{
 				"disk_id": disk_id,
 				"entity_id": info.id,
 				"target_ranges": target_ranges,
-				"block_size": BLOCK_SIZE,
+				"block_size": block_size,
 				"piece_length": info.piece_length,
 			}
 		)
@@ -190,27 +206,30 @@ def _get_v1_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> L
 			raise NotImplementedError("verify of encrypted data")
 
 		for (piece_num, piece_rows) in itertools.groupby(rows, lambda t: t[0]):
-			block_ranges = [range(r.lower, r.upper) for (_, r, _) in piece_rows]
+			block_spans = [
+				_BlockSpan(range(r.lower, r.upper), off)
+				for (_, r, off, _) in piece_rows
+			]
 			run = _V1Run(
 				entity_id = info.id,
 				entity_length = info.length,
 				piece_length = info.piece_length,
 				piece_num = piece_num,
-				block_ranges = block_ranges,
+				block_spans = block_spans,
 				hash = info.hashes[piece_num],
 			)
 
 			# Make sure we ended up with all the block ranges for the piece.  If not, assume the piece isn't fully
 			# written yet and skip.
 			run_length = min(info.piece_length, info.length - piece_num * info.piece_length)
-			if sum(map(len, run.block_ranges)) == math.ceil(run_length / BLOCK_SIZE):
+			if sum(len(s.blocks) * block_size - s.initial_offset for s in run.block_spans) >= run_length:
 				runs.append(run)
 
 	runs.sort(key = _run_sort_key)
 	return runs
 
 
-def _get_v2_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> Iterator[_V2Run]:
+def _get_v2_worklist(conn, disk_id: int, target_ranges: List[NumericRange], block_size: int) -> Iterator[_V2Run]:
 	cursor = conn.cursor("v2_worklist")
 	cursor.execute(
 		"""
@@ -258,7 +277,7 @@ def _get_v2_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> I
 			order by block
 		""",
 		{
-			"block_size": BLOCK_SIZE,
+			"block_size": block_size,
 			"disk_id": disk_id,
 			"target_ranges": target_ranges,
 		}
@@ -270,21 +289,24 @@ def _get_v2_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> I
 				entity_id = bytes(entity_id),
 				entity_length = entity_length,
 				piece_num = piece_num,
-				block_ranges = [range(block, block + 1)],
+				block_spans = [_BlockSpan(range(block, block + 1), 0)],
 				hash = bytes(hash_),
 			)
 		else:
 			raise NotImplementedError("verify of encrypted data")
 
 
-def _do_verify(conn, disk_id: int, target_ranges: List[range], disk_file: io.BufferedIOBase, read_size: int, read_tries: int):
+def _do_verify(
+	conn, disk_id: int, target_ranges: List[range], disk_file: io.BufferedIOBase, read_size: int, read_tries: int,
+	block_size: int
+):
 	pg_target_ranges = [NumericRange(r.start, r.stop) for r in target_ranges]
 
 	# transaction is required for named cursors in worklist generation
 	with conn:
 		worklist = heapq.merge(
-			_get_v1_worklist(conn, disk_id, pg_target_ranges),
-			_get_v2_worklist(conn, disk_id, pg_target_ranges),
+			_get_v1_worklist(conn, disk_id, pg_target_ranges, block_size),
+			_get_v2_worklist(conn, disk_id, pg_target_ranges, block_size),
 			key = _run_sort_key,
 		)
 
@@ -292,18 +314,23 @@ def _do_verify(conn, disk_id: int, target_ranges: List[range], disk_file: io.Buf
 		fails = []
 		for run in worklist:
 			if isinstance(run, _V1Run):
+				piece_length = run.piece_length
 				hasher = hashlib.sha1()
-				entity_off = run.piece_num * run.piece_length
 			else:
+				piece_length = block_size
 				hasher = hashlib.sha256()
-				entity_off = run.piece_num * BLOCK_SIZE
 			assert len(run.hash) == hasher.digest_size, "incorrect validation hash length"
 
+			entity_off = run.piece_num * piece_length
+			piece_end_ent = min(entity_off + piece_length, run.entity_length)
+			block_ranges = [s.blocks for s in run.block_spans]
 			try:
-				for range_ in run.block_ranges:
-					for block in range_:
-						read_start = block * BLOCK_SIZE
-						read_end = read_start + min(BLOCK_SIZE, run.entity_length - entity_off)
+				for span in run.block_spans:
+					for (i, block) in enumerate(span.blocks):
+						block_offset = span.initial_offset if i == 0 else 0
+						read_start = block * block_size + block_offset
+						read_size = min(block_size - block_offset, piece_end_ent - entity_off)
+						read_end = read_start + read_size
 						disk_file.seek(read_start)
 						while disk_file.tell() < read_end:
 							pos = disk_file.tell()
@@ -319,14 +346,14 @@ def _do_verify(conn, disk_id: int, target_ranges: List[range], disk_file: io.Buf
 
 							assert data
 							hasher.update(data)
-						entity_off += BLOCK_SIZE
+						entity_off += read_size
 			except _BadSector:
-				fails.extend(run.block_ranges)
+				fails.extend(block_ranges)
 			else:
 				if hasher.digest() == run.hash:
-					passes.extend(run.block_ranges)
+					passes.extend(block_ranges)
 				else:
-					fails.extend(run.block_ranges)
+					fails.extend(block_ranges)
 
 	def clean_up(ranges):
 		out = []
@@ -401,4 +428,4 @@ if __name__ == "__main__":
 	with contextlib.closing(psycopg2.connect("")) as conn:
 		target_ranges = _get_target_ranges(conn, args.disk_id, args.block_limit)
 		with open(path, "rb", buffering = _READ_BUFFER_SIZE) as disk_file:
-			_do_verify(conn, args.disk_id, target_ranges, disk_file, _READ_BUFFER_SIZE, args.read_tries)
+			_do_verify(conn, args.disk_id, target_ranges, disk_file, _READ_BUFFER_SIZE, args.read_tries, BLOCK_SIZE)