Fix verify v2 hash offset computation bug, refactor query
authorJakob Cornell <jakob+gpg@jcornell.net>
Wed, 20 Apr 2022 23:57:08 +0000 (18:57 -0500)
committerJakob Cornell <jakob+gpg@jcornell.net>
Wed, 20 Apr 2022 23:57:08 +0000 (18:57 -0500)
disk_jumble/src/disk_jumble/verify.py

index b5217f4aee842060da3dcd91b28a993833ad4b60..e2c736d2f05ae1c152c05eacb1c79951781dec53 100644 (file)
@@ -221,11 +221,12 @@ def _get_v2_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> L
                                        from diskjumble.slab
                                        where disk_id = %(disk_id)s
                                ),
-                               joined as (
+                               with_hashes as (
                                        select
                                                entity_id, entity.length, entity_blocks,
                                                slab_plus.disk_blocks, crypt_key,
                                                hashes,
+                                               elh.block_range as elh_range,
                                                entity_blocks * elh.block_range as check_erange
                                        from
                                                public.entityv2_leaf_hashes elh
@@ -233,20 +234,16 @@ def _get_v2_worklist(conn, disk_id: int, target_ranges: List[NumericRange]) -> L
                                                left outer join public.entity using (entity_id)
                                ),
                                filtered as (
-                                       select * from joined where not isempty(check_erange)
+                                       select *, generate_series(lower(check_erange), upper(check_erange) - 1) as piece_num
+                                       from (select * from with_hashes where not isempty(check_erange))
                                ),
                                exploded as (
                                        select
-                                               entity_id,
-                                               length,
-                                               generate_series(lower(check_erange), upper(check_erange) - 1) as piece_num,
-                                               (
-                                                       generate_series(lower(check_erange), upper(check_erange) - 1)
-                                                       - lower(entity_blocks) + lower(disk_blocks)
-                                               ) as block,
+                                               entity_id, length, piece_num,
+                                               piece_num - lower(entity_blocks) + lower(disk_blocks) as block,
                                                substring(
                                                        hashes,
-                                                       generate_series(lower(check_erange), upper(check_erange) - 1)::integer * 32 + 1,
+                                                       (piece_num - lower(elh_range))::integer * 32 + 1,
                                                        32
                                                ) as hash,
                                                crypt_key
@@ -311,6 +308,7 @@ def _do_verify(conn, disk_id: int, target_ranges: List[range], disk_file: io.Buf
                else:
                        hasher = hashlib.sha256()
                        entity_off = run.piece_num * BLOCK_SIZE
+               assert len(run.hash) == hasher.digest_size, "incorrect validation hash length"
 
                try:
                        for range_ in run.block_ranges: