1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright 2023 Red Hat
4   */
5  
6  #include "encodings.h"
7  
8  #include <linux/log2.h>
9  
10  #include "logger.h"
11  #include "memory-alloc.h"
12  #include "permassert.h"
13  
14  #include "constants.h"
15  #include "status-codes.h"
16  #include "types.h"
17  
18  /** The maximum logical space is 4 petabytes, which is 1 terablock. */
19  static const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024;
20  
21  /** The maximum physical space is 256 terabytes, which is 64 gigablocks. */
22  static const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64;
23  
24  struct geometry_block {
25  	char magic_number[VDO_GEOMETRY_MAGIC_NUMBER_SIZE];
26  	struct packed_header header;
27  	u32 checksum;
28  } __packed;
29  
30  static const struct header GEOMETRY_BLOCK_HEADER_5_0 = {
31  	.id = VDO_GEOMETRY_BLOCK,
32  	.version = {
33  		.major_version = 5,
34  		.minor_version = 0,
35  	},
36  	/*
37  	 * Note: this size isn't just the payload size following the header, like it is everywhere
38  	 * else in VDO.
39  	 */
40  	.size = sizeof(struct geometry_block) + sizeof(struct volume_geometry),
41  };
42  
43  static const struct header GEOMETRY_BLOCK_HEADER_4_0 = {
44  	.id = VDO_GEOMETRY_BLOCK,
45  	.version = {
46  		.major_version = 4,
47  		.minor_version = 0,
48  	},
49  	/*
50  	 * Note: this size isn't just the payload size following the header, like it is everywhere
51  	 * else in VDO.
52  	 */
53  	.size = sizeof(struct geometry_block) + sizeof(struct volume_geometry_4_0),
54  };
55  
56  const u8 VDO_GEOMETRY_MAGIC_NUMBER[VDO_GEOMETRY_MAGIC_NUMBER_SIZE + 1] = "dmvdo001";
57  
58  #define PAGE_HEADER_4_1_SIZE (8 + 8 + 8 + 1 + 1 + 1 + 1)
59  
60  static const struct version_number BLOCK_MAP_4_1 = {
61  	.major_version = 4,
62  	.minor_version = 1,
63  };
64  
65  const struct header VDO_BLOCK_MAP_HEADER_2_0 = {
66  	.id = VDO_BLOCK_MAP,
67  	.version = {
68  		.major_version = 2,
69  		.minor_version = 0,
70  	},
71  	.size = sizeof(struct block_map_state_2_0),
72  };
73  
74  const struct header VDO_RECOVERY_JOURNAL_HEADER_7_0 = {
75  	.id = VDO_RECOVERY_JOURNAL,
76  	.version = {
77  			.major_version = 7,
78  			.minor_version = 0,
79  		},
80  	.size = sizeof(struct recovery_journal_state_7_0),
81  };
82  
83  const struct header VDO_SLAB_DEPOT_HEADER_2_0 = {
84  	.id = VDO_SLAB_DEPOT,
85  	.version = {
86  		.major_version = 2,
87  		.minor_version = 0,
88  	},
89  	.size = sizeof(struct slab_depot_state_2_0),
90  };
91  
92  static const struct header VDO_LAYOUT_HEADER_3_0 = {
93  	.id = VDO_LAYOUT,
94  	.version = {
95  		.major_version = 3,
96  		.minor_version = 0,
97  	},
98  	.size = sizeof(struct layout_3_0) + (sizeof(struct partition_3_0) * VDO_PARTITION_COUNT),
99  };
100  
101  static const enum partition_id REQUIRED_PARTITIONS[] = {
102  	VDO_BLOCK_MAP_PARTITION,
103  	VDO_SLAB_DEPOT_PARTITION,
104  	VDO_RECOVERY_JOURNAL_PARTITION,
105  	VDO_SLAB_SUMMARY_PARTITION,
106  };
107  
108  /*
109   * The current version for the data encoded in the super block. This must be changed any time there
110   * is a change to encoding of the component data of any VDO component.
111   */
112  static const struct version_number VDO_COMPONENT_DATA_41_0 = {
113  	.major_version = 41,
114  	.minor_version = 0,
115  };
116  
117  const struct version_number VDO_VOLUME_VERSION_67_0 = {
118  	.major_version = 67,
119  	.minor_version = 0,
120  };
121  
122  static const struct header SUPER_BLOCK_HEADER_12_0 = {
123  	.id = VDO_SUPER_BLOCK,
124  	.version = {
125  			.major_version = 12,
126  			.minor_version = 0,
127  		},
128  
129  	/* This is the minimum size, if the super block contains no components. */
130  	.size = VDO_SUPER_BLOCK_FIXED_SIZE - VDO_ENCODED_HEADER_SIZE,
131  };
132  
133  /**
134   * validate_version() - Check whether a version matches an expected version.
135   * @expected_version: The expected version.
136   * @actual_version: The version being validated.
137   * @component_name: The name of the component or the calling function (for error logging).
138   *
139   * Logs an error describing a mismatch.
140   *
141   * Return: VDO_SUCCESS             if the versions are the same,
142   *         VDO_UNSUPPORTED_VERSION if the versions don't match.
143   */
validate_version(struct version_number expected_version,struct version_number actual_version,const char * component_name)144  static int __must_check validate_version(struct version_number expected_version,
145  					 struct version_number actual_version,
146  					 const char *component_name)
147  {
148  	if (!vdo_are_same_version(expected_version, actual_version)) {
149  		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
150  					      "%s version mismatch, expected %d.%d, got %d.%d",
151  					      component_name,
152  					      expected_version.major_version,
153  					      expected_version.minor_version,
154  					      actual_version.major_version,
155  					      actual_version.minor_version);
156  	}
157  
158  	return VDO_SUCCESS;
159  }
160  
161  /**
162   * vdo_validate_header() - Check whether a header matches expectations.
163   * @expected_header: The expected header.
164   * @actual_header: The header being validated.
165   * @exact_size: If true, the size fields of the two headers must be the same, otherwise it is
166   *              required that actual_header.size >= expected_header.size.
167   * @name: The name of the component or the calling function (for error logging).
168   *
169   * Logs an error describing the first mismatch found.
170   *
171   * Return: VDO_SUCCESS             if the header meets expectations,
172   *         VDO_INCORRECT_COMPONENT if the component ids don't match,
173   *         VDO_UNSUPPORTED_VERSION if the versions or sizes don't match.
174   */
vdo_validate_header(const struct header * expected_header,const struct header * actual_header,bool exact_size,const char * name)175  int vdo_validate_header(const struct header *expected_header,
176  			const struct header *actual_header, bool exact_size,
177  			const char *name)
178  {
179  	int result;
180  
181  	if (expected_header->id != actual_header->id) {
182  		return vdo_log_error_strerror(VDO_INCORRECT_COMPONENT,
183  					      "%s ID mismatch, expected %d, got %d",
184  					      name, expected_header->id,
185  					      actual_header->id);
186  	}
187  
188  	result = validate_version(expected_header->version, actual_header->version,
189  				  name);
190  	if (result != VDO_SUCCESS)
191  		return result;
192  
193  	if ((expected_header->size > actual_header->size) ||
194  	    (exact_size && (expected_header->size < actual_header->size))) {
195  		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
196  					      "%s size mismatch, expected %zu, got %zu",
197  					      name, expected_header->size,
198  					      actual_header->size);
199  	}
200  
201  	return VDO_SUCCESS;
202  }
203  
encode_version_number(u8 * buffer,size_t * offset,struct version_number version)204  static void encode_version_number(u8 *buffer, size_t *offset,
205  				  struct version_number version)
206  {
207  	struct packed_version_number packed = vdo_pack_version_number(version);
208  
209  	memcpy(buffer + *offset, &packed, sizeof(packed));
210  	*offset += sizeof(packed);
211  }
212  
vdo_encode_header(u8 * buffer,size_t * offset,const struct header * header)213  void vdo_encode_header(u8 *buffer, size_t *offset, const struct header *header)
214  {
215  	struct packed_header packed = vdo_pack_header(header);
216  
217  	memcpy(buffer + *offset, &packed, sizeof(packed));
218  	*offset += sizeof(packed);
219  }
220  
decode_version_number(u8 * buffer,size_t * offset,struct version_number * version)221  static void decode_version_number(u8 *buffer, size_t *offset,
222  				  struct version_number *version)
223  {
224  	struct packed_version_number packed;
225  
226  	memcpy(&packed, buffer + *offset, sizeof(packed));
227  	*offset += sizeof(packed);
228  	*version = vdo_unpack_version_number(packed);
229  }
230  
vdo_decode_header(u8 * buffer,size_t * offset,struct header * header)231  void vdo_decode_header(u8 *buffer, size_t *offset, struct header *header)
232  {
233  	struct packed_header packed;
234  
235  	memcpy(&packed, buffer + *offset, sizeof(packed));
236  	*offset += sizeof(packed);
237  
238  	*header = vdo_unpack_header(&packed);
239  }
240  
241  /**
242   * decode_volume_geometry() - Decode the on-disk representation of a volume geometry from a buffer.
243   * @buffer: A buffer to decode from.
244   * @offset: The offset in the buffer at which to decode.
245   * @geometry: The structure to receive the decoded fields.
246   * @version: The geometry block version to decode.
247   */
decode_volume_geometry(u8 * buffer,size_t * offset,struct volume_geometry * geometry,u32 version)248  static void decode_volume_geometry(u8 *buffer, size_t *offset,
249  				   struct volume_geometry *geometry, u32 version)
250  {
251  	u32 unused, mem;
252  	enum volume_region_id id;
253  	nonce_t nonce;
254  	block_count_t bio_offset = 0;
255  	bool sparse;
256  
257  	/* This is for backwards compatibility. */
258  	decode_u32_le(buffer, offset, &unused);
259  	geometry->unused = unused;
260  
261  	decode_u64_le(buffer, offset, &nonce);
262  	geometry->nonce = nonce;
263  
264  	memcpy((unsigned char *) &geometry->uuid, buffer + *offset, sizeof(uuid_t));
265  	*offset += sizeof(uuid_t);
266  
267  	if (version > 4)
268  		decode_u64_le(buffer, offset, &bio_offset);
269  	geometry->bio_offset = bio_offset;
270  
271  	for (id = 0; id < VDO_VOLUME_REGION_COUNT; id++) {
272  		physical_block_number_t start_block;
273  		enum volume_region_id saved_id;
274  
275  		decode_u32_le(buffer, offset, &saved_id);
276  		decode_u64_le(buffer, offset, &start_block);
277  
278  		geometry->regions[id] = (struct volume_region) {
279  			.id = saved_id,
280  			.start_block = start_block,
281  		};
282  	}
283  
284  	decode_u32_le(buffer, offset, &mem);
285  	*offset += sizeof(u32);
286  	sparse = buffer[(*offset)++];
287  
288  	geometry->index_config = (struct index_config) {
289  		.mem = mem,
290  		.sparse = sparse,
291  	};
292  }
293  
294  /**
295   * vdo_parse_geometry_block() - Decode and validate an encoded geometry block.
296   * @block: The encoded geometry block.
297   * @geometry: The structure to receive the decoded fields.
298   */
vdo_parse_geometry_block(u8 * block,struct volume_geometry * geometry)299  int __must_check vdo_parse_geometry_block(u8 *block, struct volume_geometry *geometry)
300  {
301  	u32 checksum, saved_checksum;
302  	struct header header;
303  	size_t offset = 0;
304  	int result;
305  
306  	if (memcmp(block, VDO_GEOMETRY_MAGIC_NUMBER, VDO_GEOMETRY_MAGIC_NUMBER_SIZE) != 0)
307  		return VDO_BAD_MAGIC;
308  	offset += VDO_GEOMETRY_MAGIC_NUMBER_SIZE;
309  
310  	vdo_decode_header(block, &offset, &header);
311  	if (header.version.major_version <= 4) {
312  		result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_4_0, &header,
313  					     true, __func__);
314  	} else {
315  		result = vdo_validate_header(&GEOMETRY_BLOCK_HEADER_5_0, &header,
316  					     true, __func__);
317  	}
318  	if (result != VDO_SUCCESS)
319  		return result;
320  
321  	decode_volume_geometry(block, &offset, geometry, header.version.major_version);
322  
323  	result = VDO_ASSERT(header.size == offset + sizeof(u32),
324  			    "should have decoded up to the geometry checksum");
325  	if (result != VDO_SUCCESS)
326  		return result;
327  
328  	/* Decode and verify the checksum. */
329  	checksum = vdo_crc32(block, offset);
330  	decode_u32_le(block, &offset, &saved_checksum);
331  
332  	return ((checksum == saved_checksum) ? VDO_SUCCESS : VDO_CHECKSUM_MISMATCH);
333  }
334  
vdo_format_block_map_page(void * buffer,nonce_t nonce,physical_block_number_t pbn,bool initialized)335  struct block_map_page *vdo_format_block_map_page(void *buffer, nonce_t nonce,
336  						 physical_block_number_t pbn,
337  						 bool initialized)
338  {
339  	struct block_map_page *page = buffer;
340  
341  	memset(buffer, 0, VDO_BLOCK_SIZE);
342  	page->version = vdo_pack_version_number(BLOCK_MAP_4_1);
343  	page->header.nonce = __cpu_to_le64(nonce);
344  	page->header.pbn = __cpu_to_le64(pbn);
345  	page->header.initialized = initialized;
346  	return page;
347  }
348  
vdo_validate_block_map_page(struct block_map_page * page,nonce_t nonce,physical_block_number_t pbn)349  enum block_map_page_validity vdo_validate_block_map_page(struct block_map_page *page,
350  							 nonce_t nonce,
351  							 physical_block_number_t pbn)
352  {
353  	BUILD_BUG_ON(sizeof(struct block_map_page_header) != PAGE_HEADER_4_1_SIZE);
354  
355  	if (!vdo_are_same_version(BLOCK_MAP_4_1,
356  				  vdo_unpack_version_number(page->version)) ||
357  	    !page->header.initialized || (nonce != __le64_to_cpu(page->header.nonce)))
358  		return VDO_BLOCK_MAP_PAGE_INVALID;
359  
360  	if (pbn != vdo_get_block_map_page_pbn(page))
361  		return VDO_BLOCK_MAP_PAGE_BAD;
362  
363  	return VDO_BLOCK_MAP_PAGE_VALID;
364  }
365  
decode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 * state)366  static int decode_block_map_state_2_0(u8 *buffer, size_t *offset,
367  				      struct block_map_state_2_0 *state)
368  {
369  	size_t initial_offset;
370  	block_count_t flat_page_count, root_count;
371  	physical_block_number_t flat_page_origin, root_origin;
372  	struct header header;
373  	int result;
374  
375  	vdo_decode_header(buffer, offset, &header);
376  	result = vdo_validate_header(&VDO_BLOCK_MAP_HEADER_2_0, &header, true, __func__);
377  	if (result != VDO_SUCCESS)
378  		return result;
379  
380  	initial_offset = *offset;
381  
382  	decode_u64_le(buffer, offset, &flat_page_origin);
383  	result = VDO_ASSERT(flat_page_origin == VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
384  			    "Flat page origin must be %u (recorded as %llu)",
385  			    VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN,
386  			    (unsigned long long) state->flat_page_origin);
387  	if (result != VDO_SUCCESS)
388  		return result;
389  
390  	decode_u64_le(buffer, offset, &flat_page_count);
391  	result = VDO_ASSERT(flat_page_count == 0,
392  			    "Flat page count must be 0 (recorded as %llu)",
393  			    (unsigned long long) state->flat_page_count);
394  	if (result != VDO_SUCCESS)
395  		return result;
396  
397  	decode_u64_le(buffer, offset, &root_origin);
398  	decode_u64_le(buffer, offset, &root_count);
399  
400  	result = VDO_ASSERT(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
401  			    "decoded block map component size must match header size");
402  	if (result != VDO_SUCCESS)
403  		return result;
404  
405  	*state = (struct block_map_state_2_0) {
406  		.flat_page_origin = flat_page_origin,
407  		.flat_page_count = flat_page_count,
408  		.root_origin = root_origin,
409  		.root_count = root_count,
410  	};
411  
412  	return VDO_SUCCESS;
413  }
414  
encode_block_map_state_2_0(u8 * buffer,size_t * offset,struct block_map_state_2_0 state)415  static void encode_block_map_state_2_0(u8 *buffer, size_t *offset,
416  				       struct block_map_state_2_0 state)
417  {
418  	size_t initial_offset;
419  
420  	vdo_encode_header(buffer, offset, &VDO_BLOCK_MAP_HEADER_2_0);
421  
422  	initial_offset = *offset;
423  	encode_u64_le(buffer, offset, state.flat_page_origin);
424  	encode_u64_le(buffer, offset, state.flat_page_count);
425  	encode_u64_le(buffer, offset, state.root_origin);
426  	encode_u64_le(buffer, offset, state.root_count);
427  
428  	VDO_ASSERT_LOG_ONLY(VDO_BLOCK_MAP_HEADER_2_0.size == *offset - initial_offset,
429  			    "encoded block map component size must match header size");
430  }
431  
432  /**
433   * vdo_compute_new_forest_pages() - Compute the number of pages which must be allocated at each
434   *                                  level in order to grow the forest to a new number of entries.
435   * @entries: The new number of entries the block map must address.
436   *
437   * Return: The total number of non-leaf pages required.
438   */
vdo_compute_new_forest_pages(root_count_t root_count,struct boundary * old_sizes,block_count_t entries,struct boundary * new_sizes)439  block_count_t vdo_compute_new_forest_pages(root_count_t root_count,
440  					   struct boundary *old_sizes,
441  					   block_count_t entries,
442  					   struct boundary *new_sizes)
443  {
444  	page_count_t leaf_pages = max(vdo_compute_block_map_page_count(entries), 1U);
445  	page_count_t level_size = DIV_ROUND_UP(leaf_pages, root_count);
446  	block_count_t total_pages = 0;
447  	height_t height;
448  
449  	for (height = 0; height < VDO_BLOCK_MAP_TREE_HEIGHT; height++) {
450  		block_count_t new_pages;
451  
452  		level_size = DIV_ROUND_UP(level_size, VDO_BLOCK_MAP_ENTRIES_PER_PAGE);
453  		new_sizes->levels[height] = level_size;
454  		new_pages = level_size;
455  		if (old_sizes != NULL)
456  			new_pages -= old_sizes->levels[height];
457  		total_pages += (new_pages * root_count);
458  	}
459  
460  	return total_pages;
461  }
462  
463  /**
464   * encode_recovery_journal_state_7_0() - Encode the state of a recovery journal.
465   *
466   * Return: VDO_SUCCESS or an error code.
467   */
encode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 state)468  static void encode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
469  					      struct recovery_journal_state_7_0 state)
470  {
471  	size_t initial_offset;
472  
473  	vdo_encode_header(buffer, offset, &VDO_RECOVERY_JOURNAL_HEADER_7_0);
474  
475  	initial_offset = *offset;
476  	encode_u64_le(buffer, offset, state.journal_start);
477  	encode_u64_le(buffer, offset, state.logical_blocks_used);
478  	encode_u64_le(buffer, offset, state.block_map_data_blocks);
479  
480  	VDO_ASSERT_LOG_ONLY(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
481  			    "encoded recovery journal component size must match header size");
482  }
483  
484  /**
485   * decode_recovery_journal_state_7_0() - Decode the state of a recovery journal saved in a buffer.
486   * @buffer: The buffer containing the saved state.
487   * @state: A pointer to a recovery journal state to hold the result of a successful decode.
488   *
489   * Return: VDO_SUCCESS or an error code.
490   */
decode_recovery_journal_state_7_0(u8 * buffer,size_t * offset,struct recovery_journal_state_7_0 * state)491  static int __must_check decode_recovery_journal_state_7_0(u8 *buffer, size_t *offset,
492  							  struct recovery_journal_state_7_0 *state)
493  {
494  	struct header header;
495  	int result;
496  	size_t initial_offset;
497  	sequence_number_t journal_start;
498  	block_count_t logical_blocks_used, block_map_data_blocks;
499  
500  	vdo_decode_header(buffer, offset, &header);
501  	result = vdo_validate_header(&VDO_RECOVERY_JOURNAL_HEADER_7_0, &header, true,
502  				     __func__);
503  	if (result != VDO_SUCCESS)
504  		return result;
505  
506  	initial_offset = *offset;
507  	decode_u64_le(buffer, offset, &journal_start);
508  	decode_u64_le(buffer, offset, &logical_blocks_used);
509  	decode_u64_le(buffer, offset, &block_map_data_blocks);
510  
511  	result = VDO_ASSERT(VDO_RECOVERY_JOURNAL_HEADER_7_0.size == *offset - initial_offset,
512  			    "decoded recovery journal component size must match header size");
513  	if (result != VDO_SUCCESS)
514  		return result;
515  
516  	*state = (struct recovery_journal_state_7_0) {
517  		.journal_start = journal_start,
518  		.logical_blocks_used = logical_blocks_used,
519  		.block_map_data_blocks = block_map_data_blocks,
520  	};
521  
522  	return VDO_SUCCESS;
523  }
524  
525  /**
526   * vdo_get_journal_operation_name() - Get the name of a journal operation.
527   * @operation: The operation to name.
528   *
529   * Return: The name of the operation.
530   */
vdo_get_journal_operation_name(enum journal_operation operation)531  const char *vdo_get_journal_operation_name(enum journal_operation operation)
532  {
533  	switch (operation) {
534  	case VDO_JOURNAL_DATA_REMAPPING:
535  		return "data remapping";
536  
537  	case VDO_JOURNAL_BLOCK_MAP_REMAPPING:
538  		return "block map remapping";
539  
540  	default:
541  		return "unknown journal operation";
542  	}
543  }
544  
545  /**
546   * encode_slab_depot_state_2_0() - Encode the state of a slab depot into a buffer.
547   */
encode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 state)548  static void encode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
549  					struct slab_depot_state_2_0 state)
550  {
551  	size_t initial_offset;
552  
553  	vdo_encode_header(buffer, offset, &VDO_SLAB_DEPOT_HEADER_2_0);
554  
555  	initial_offset = *offset;
556  	encode_u64_le(buffer, offset, state.slab_config.slab_blocks);
557  	encode_u64_le(buffer, offset, state.slab_config.data_blocks);
558  	encode_u64_le(buffer, offset, state.slab_config.reference_count_blocks);
559  	encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocks);
560  	encode_u64_le(buffer, offset, state.slab_config.slab_journal_flushing_threshold);
561  	encode_u64_le(buffer, offset, state.slab_config.slab_journal_blocking_threshold);
562  	encode_u64_le(buffer, offset, state.slab_config.slab_journal_scrubbing_threshold);
563  	encode_u64_le(buffer, offset, state.first_block);
564  	encode_u64_le(buffer, offset, state.last_block);
565  	buffer[(*offset)++] = state.zone_count;
566  
567  	VDO_ASSERT_LOG_ONLY(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
568  			    "encoded block map component size must match header size");
569  }
570  
571  /**
572   * decode_slab_depot_state_2_0() - Decode slab depot component state version 2.0 from a buffer.
573   *
574   * Return: VDO_SUCCESS or an error code.
575   */
decode_slab_depot_state_2_0(u8 * buffer,size_t * offset,struct slab_depot_state_2_0 * state)576  static int decode_slab_depot_state_2_0(u8 *buffer, size_t *offset,
577  				       struct slab_depot_state_2_0 *state)
578  {
579  	struct header header;
580  	int result;
581  	size_t initial_offset;
582  	struct slab_config slab_config;
583  	block_count_t count;
584  	physical_block_number_t first_block, last_block;
585  	zone_count_t zone_count;
586  
587  	vdo_decode_header(buffer, offset, &header);
588  	result = vdo_validate_header(&VDO_SLAB_DEPOT_HEADER_2_0, &header, true,
589  				     __func__);
590  	if (result != VDO_SUCCESS)
591  		return result;
592  
593  	initial_offset = *offset;
594  	decode_u64_le(buffer, offset, &count);
595  	slab_config.slab_blocks = count;
596  
597  	decode_u64_le(buffer, offset, &count);
598  	slab_config.data_blocks = count;
599  
600  	decode_u64_le(buffer, offset, &count);
601  	slab_config.reference_count_blocks = count;
602  
603  	decode_u64_le(buffer, offset, &count);
604  	slab_config.slab_journal_blocks = count;
605  
606  	decode_u64_le(buffer, offset, &count);
607  	slab_config.slab_journal_flushing_threshold = count;
608  
609  	decode_u64_le(buffer, offset, &count);
610  	slab_config.slab_journal_blocking_threshold = count;
611  
612  	decode_u64_le(buffer, offset, &count);
613  	slab_config.slab_journal_scrubbing_threshold = count;
614  
615  	decode_u64_le(buffer, offset, &first_block);
616  	decode_u64_le(buffer, offset, &last_block);
617  	zone_count = buffer[(*offset)++];
618  
619  	result = VDO_ASSERT(VDO_SLAB_DEPOT_HEADER_2_0.size == *offset - initial_offset,
620  			    "decoded slab depot component size must match header size");
621  	if (result != VDO_SUCCESS)
622  		return result;
623  
624  	*state = (struct slab_depot_state_2_0) {
625  		.slab_config = slab_config,
626  		.first_block = first_block,
627  		.last_block = last_block,
628  		.zone_count = zone_count,
629  	};
630  
631  	return VDO_SUCCESS;
632  }
633  
634  /**
635   * vdo_configure_slab_depot() - Configure the slab depot.
636   * @partition: The slab depot partition
637   * @slab_config: The configuration of a single slab.
638   * @zone_count: The number of zones the depot will use.
639   * @state: The state structure to be configured.
640   *
641   * Configures the slab_depot for the specified storage capacity, finding the number of data blocks
642   * that will fit and still leave room for the depot metadata, then return the saved state for that
643   * configuration.
644   *
645   * Return: VDO_SUCCESS or an error code.
646   */
vdo_configure_slab_depot(const struct partition * partition,struct slab_config slab_config,zone_count_t zone_count,struct slab_depot_state_2_0 * state)647  int vdo_configure_slab_depot(const struct partition *partition,
648  			     struct slab_config slab_config, zone_count_t zone_count,
649  			     struct slab_depot_state_2_0 *state)
650  {
651  	block_count_t total_slab_blocks, total_data_blocks;
652  	size_t slab_count;
653  	physical_block_number_t last_block;
654  	block_count_t slab_size = slab_config.slab_blocks;
655  
656  	vdo_log_debug("slabDepot %s(block_count=%llu, first_block=%llu, slab_size=%llu, zone_count=%u)",
657  		      __func__, (unsigned long long) partition->count,
658  		      (unsigned long long) partition->offset,
659  		      (unsigned long long) slab_size, zone_count);
660  
661  	/* We do not allow runt slabs, so we waste up to a slab's worth. */
662  	slab_count = (partition->count / slab_size);
663  	if (slab_count == 0)
664  		return VDO_NO_SPACE;
665  
666  	if (slab_count > MAX_VDO_SLABS)
667  		return VDO_TOO_MANY_SLABS;
668  
669  	total_slab_blocks = slab_count * slab_config.slab_blocks;
670  	total_data_blocks = slab_count * slab_config.data_blocks;
671  	last_block = partition->offset + total_slab_blocks;
672  
673  	*state = (struct slab_depot_state_2_0) {
674  		.slab_config = slab_config,
675  		.first_block = partition->offset,
676  		.last_block = last_block,
677  		.zone_count = zone_count,
678  	};
679  
680  	vdo_log_debug("slab_depot last_block=%llu, total_data_blocks=%llu, slab_count=%zu, left_over=%llu",
681  		      (unsigned long long) last_block,
682  		      (unsigned long long) total_data_blocks, slab_count,
683  		      (unsigned long long) (partition->count - (last_block - partition->offset)));
684  
685  	return VDO_SUCCESS;
686  }
687  
688  /**
689   * vdo_configure_slab() - Measure and initialize the configuration to use for each slab.
690   * @slab_size: The number of blocks per slab.
691   * @slab_journal_blocks: The number of blocks for the slab journal.
692   * @slab_config: The slab configuration to initialize.
693   *
694   * Return: VDO_SUCCESS or an error code.
695   */
vdo_configure_slab(block_count_t slab_size,block_count_t slab_journal_blocks,struct slab_config * slab_config)696  int vdo_configure_slab(block_count_t slab_size, block_count_t slab_journal_blocks,
697  		       struct slab_config *slab_config)
698  {
699  	block_count_t ref_blocks, meta_blocks, data_blocks;
700  	block_count_t flushing_threshold, remaining, blocking_threshold;
701  	block_count_t minimal_extra_space, scrubbing_threshold;
702  
703  	if (slab_journal_blocks >= slab_size)
704  		return VDO_BAD_CONFIGURATION;
705  
706  	/*
707  	 * This calculation should technically be a recurrence, but the total number of metadata
708  	 * blocks is currently less than a single block of ref_counts, so we'd gain at most one
709  	 * data block in each slab with more iteration.
710  	 */
711  	ref_blocks = vdo_get_saved_reference_count_size(slab_size - slab_journal_blocks);
712  	meta_blocks = (ref_blocks + slab_journal_blocks);
713  
714  	/* Make sure test code hasn't configured slabs to be too small. */
715  	if (meta_blocks >= slab_size)
716  		return VDO_BAD_CONFIGURATION;
717  
718  	/*
719  	 * If the slab size is very small, assume this must be a unit test and override the number
720  	 * of data blocks to be a power of two (wasting blocks in the slab). Many tests need their
721  	 * data_blocks fields to be the exact capacity of the configured volume, and that used to
722  	 * fall out since they use a power of two for the number of data blocks, the slab size was
723  	 * a power of two, and every block in a slab was a data block.
724  	 *
725  	 * TODO: Try to figure out some way of structuring testParameters and unit tests so this
726  	 * hack isn't needed without having to edit several unit tests every time the metadata size
727  	 * changes by one block.
728  	 */
729  	data_blocks = slab_size - meta_blocks;
730  	if ((slab_size < 1024) && !is_power_of_2(data_blocks))
731  		data_blocks = ((block_count_t) 1 << ilog2(data_blocks));
732  
733  	/*
734  	 * Configure the slab journal thresholds. The flush threshold is 168 of 224 blocks in
735  	 * production, or 3/4ths, so we use this ratio for all sizes.
736  	 */
737  	flushing_threshold = ((slab_journal_blocks * 3) + 3) / 4;
738  	/*
739  	 * The blocking threshold should be far enough from the flushing threshold to not produce
740  	 * delays, but far enough from the end of the journal to allow multiple successive recovery
741  	 * failures.
742  	 */
743  	remaining = slab_journal_blocks - flushing_threshold;
744  	blocking_threshold = flushing_threshold + ((remaining * 5) / 7);
745  	/* The scrubbing threshold should be at least 2048 entries before the end of the journal. */
746  	minimal_extra_space = 1 + (MAXIMUM_VDO_USER_VIOS / VDO_SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
747  	scrubbing_threshold = blocking_threshold;
748  	if (slab_journal_blocks > minimal_extra_space)
749  		scrubbing_threshold = slab_journal_blocks - minimal_extra_space;
750  	if (blocking_threshold > scrubbing_threshold)
751  		blocking_threshold = scrubbing_threshold;
752  
753  	*slab_config = (struct slab_config) {
754  		.slab_blocks = slab_size,
755  		.data_blocks = data_blocks,
756  		.reference_count_blocks = ref_blocks,
757  		.slab_journal_blocks = slab_journal_blocks,
758  		.slab_journal_flushing_threshold = flushing_threshold,
759  		.slab_journal_blocking_threshold = blocking_threshold,
760  		.slab_journal_scrubbing_threshold = scrubbing_threshold};
761  	return VDO_SUCCESS;
762  }
763  
764  /**
765   * vdo_decode_slab_journal_entry() - Decode a slab journal entry.
766   * @block: The journal block holding the entry.
767   * @entry_count: The number of the entry.
768   *
769   * Return: The decoded entry.
770   */
vdo_decode_slab_journal_entry(struct packed_slab_journal_block * block,journal_entry_count_t entry_count)771  struct slab_journal_entry vdo_decode_slab_journal_entry(struct packed_slab_journal_block *block,
772  							journal_entry_count_t entry_count)
773  {
774  	struct slab_journal_entry entry =
775  		vdo_unpack_slab_journal_entry(&block->payload.entries[entry_count]);
776  
777  	if (block->header.has_block_map_increments &&
778  	    ((block->payload.full_entries.entry_types[entry_count / 8] &
779  	      ((u8) 1 << (entry_count % 8))) != 0))
780  		entry.operation = VDO_JOURNAL_BLOCK_MAP_REMAPPING;
781  
782  	return entry;
783  }
784  
785  /**
786   * allocate_partition() - Allocate a partition and add it to a layout.
787   * @layout: The layout containing the partition.
788   * @id: The id of the partition.
789   * @offset: The offset into the layout at which the partition begins.
790   * @size: The size of the partition in blocks.
791   *
792   * Return: VDO_SUCCESS or an error.
793   */
allocate_partition(struct layout * layout,u8 id,physical_block_number_t offset,block_count_t size)794  static int allocate_partition(struct layout *layout, u8 id,
795  			      physical_block_number_t offset, block_count_t size)
796  {
797  	struct partition *partition;
798  	int result;
799  
800  	result = vdo_allocate(1, struct partition, __func__, &partition);
801  	if (result != VDO_SUCCESS)
802  		return result;
803  
804  	partition->id = id;
805  	partition->offset = offset;
806  	partition->count = size;
807  	partition->next = layout->head;
808  	layout->head = partition;
809  
810  	return VDO_SUCCESS;
811  }
812  
813  /**
814   * make_partition() - Create a new partition from the beginning or end of the unused space in a
815   *                    layout.
816   * @layout: The layout.
817   * @id: The id of the partition to make.
818   * @size: The number of blocks to carve out; if 0, all remaining space will be used.
819   * @beginning: True if the partition should start at the beginning of the unused space.
820   *
821   * Return: A success or error code, particularly VDO_NO_SPACE if there are fewer than size blocks
822   *         remaining.
823   */
make_partition(struct layout * layout,enum partition_id id,block_count_t size,bool beginning)824  static int __must_check make_partition(struct layout *layout, enum partition_id id,
825  				       block_count_t size, bool beginning)
826  {
827  	int result;
828  	physical_block_number_t offset;
829  	block_count_t free_blocks = layout->last_free - layout->first_free;
830  
831  	if (size == 0) {
832  		if (free_blocks == 0)
833  			return VDO_NO_SPACE;
834  		size = free_blocks;
835  	} else if (size > free_blocks) {
836  		return VDO_NO_SPACE;
837  	}
838  
839  	result = vdo_get_partition(layout, id, NULL);
840  	if (result != VDO_UNKNOWN_PARTITION)
841  		return VDO_PARTITION_EXISTS;
842  
843  	offset = beginning ? layout->first_free : (layout->last_free - size);
844  
845  	result = allocate_partition(layout, id, offset, size);
846  	if (result != VDO_SUCCESS)
847  		return result;
848  
849  	layout->num_partitions++;
850  	if (beginning)
851  		layout->first_free += size;
852  	else
853  		layout->last_free = layout->last_free - size;
854  
855  	return VDO_SUCCESS;
856  }
857  
858  /**
859   * vdo_initialize_layout() - Lay out the partitions of a vdo.
860   * @size: The entire size of the vdo.
861   * @origin: The start of the layout on the underlying storage in blocks.
862   * @block_map_blocks: The size of the block map partition.
863   * @journal_blocks: The size of the journal partition.
864   * @summary_blocks: The size of the slab summary partition.
865   * @layout: The layout to initialize.
866   *
867   * Return: VDO_SUCCESS or an error.
868   */
vdo_initialize_layout(block_count_t size,physical_block_number_t offset,block_count_t block_map_blocks,block_count_t journal_blocks,block_count_t summary_blocks,struct layout * layout)869  int vdo_initialize_layout(block_count_t size, physical_block_number_t offset,
870  			  block_count_t block_map_blocks, block_count_t journal_blocks,
871  			  block_count_t summary_blocks, struct layout *layout)
872  {
873  	int result;
874  	block_count_t necessary_size =
875  		(offset + block_map_blocks + journal_blocks + summary_blocks);
876  
877  	if (necessary_size > size)
878  		return vdo_log_error_strerror(VDO_NO_SPACE,
879  					      "Not enough space to make a VDO");
880  
881  	*layout = (struct layout) {
882  		.start = offset,
883  		.size = size,
884  		.first_free = offset,
885  		.last_free = size,
886  		.num_partitions = 0,
887  		.head = NULL,
888  	};
889  
890  	result = make_partition(layout, VDO_BLOCK_MAP_PARTITION, block_map_blocks, true);
891  	if (result != VDO_SUCCESS) {
892  		vdo_uninitialize_layout(layout);
893  		return result;
894  	}
895  
896  	result = make_partition(layout, VDO_SLAB_SUMMARY_PARTITION, summary_blocks,
897  				false);
898  	if (result != VDO_SUCCESS) {
899  		vdo_uninitialize_layout(layout);
900  		return result;
901  	}
902  
903  	result = make_partition(layout, VDO_RECOVERY_JOURNAL_PARTITION, journal_blocks,
904  				false);
905  	if (result != VDO_SUCCESS) {
906  		vdo_uninitialize_layout(layout);
907  		return result;
908  	}
909  
910  	result = make_partition(layout, VDO_SLAB_DEPOT_PARTITION, 0, true);
911  	if (result != VDO_SUCCESS)
912  		vdo_uninitialize_layout(layout);
913  
914  	return result;
915  }
916  
917  /**
918   * vdo_uninitialize_layout() - Clean up a layout.
919   * @layout: The layout to clean up.
920   *
921   * All partitions created by this layout become invalid pointers.
922   */
vdo_uninitialize_layout(struct layout * layout)923  void vdo_uninitialize_layout(struct layout *layout)
924  {
925  	while (layout->head != NULL) {
926  		struct partition *part = layout->head;
927  
928  		layout->head = part->next;
929  		vdo_free(part);
930  	}
931  
932  	memset(layout, 0, sizeof(struct layout));
933  }
934  
935  /**
936   * vdo_get_partition() - Get a partition by id.
937   * @layout: The layout from which to get a partition.
938   * @id: The id of the partition.
939   * @partition_ptr: A pointer to hold the partition.
940   *
941   * Return: VDO_SUCCESS or an error.
942   */
vdo_get_partition(struct layout * layout,enum partition_id id,struct partition ** partition_ptr)943  int vdo_get_partition(struct layout *layout, enum partition_id id,
944  		      struct partition **partition_ptr)
945  {
946  	struct partition *partition;
947  
948  	for (partition = layout->head; partition != NULL; partition = partition->next) {
949  		if (partition->id == id) {
950  			if (partition_ptr != NULL)
951  				*partition_ptr = partition;
952  			return VDO_SUCCESS;
953  		}
954  	}
955  
956  	return VDO_UNKNOWN_PARTITION;
957  }
958  
959  /**
960   * vdo_get_known_partition() - Get a partition by id from a validated layout.
961   * @layout: The layout from which to get a partition.
962   * @id: The id of the partition.
963   *
964   * Return: the partition
965   */
vdo_get_known_partition(struct layout * layout,enum partition_id id)966  struct partition *vdo_get_known_partition(struct layout *layout, enum partition_id id)
967  {
968  	struct partition *partition;
969  	int result = vdo_get_partition(layout, id, &partition);
970  
971  	VDO_ASSERT_LOG_ONLY(result == VDO_SUCCESS, "layout has expected partition: %u", id);
972  
973  	return partition;
974  }
975  
encode_layout(u8 * buffer,size_t * offset,const struct layout * layout)976  static void encode_layout(u8 *buffer, size_t *offset, const struct layout *layout)
977  {
978  	const struct partition *partition;
979  	size_t initial_offset;
980  	struct header header = VDO_LAYOUT_HEADER_3_0;
981  
982  	BUILD_BUG_ON(sizeof(enum partition_id) != sizeof(u8));
983  	VDO_ASSERT_LOG_ONLY(layout->num_partitions <= U8_MAX,
984  			    "layout partition count must fit in a byte");
985  
986  	vdo_encode_header(buffer, offset, &header);
987  
988  	initial_offset = *offset;
989  	encode_u64_le(buffer, offset, layout->first_free);
990  	encode_u64_le(buffer, offset, layout->last_free);
991  	buffer[(*offset)++] = layout->num_partitions;
992  
993  	VDO_ASSERT_LOG_ONLY(sizeof(struct layout_3_0) == *offset - initial_offset,
994  			    "encoded size of a layout header must match structure");
995  
996  	for (partition = layout->head; partition != NULL; partition = partition->next) {
997  		buffer[(*offset)++] = partition->id;
998  		encode_u64_le(buffer, offset, partition->offset);
999  		/* This field only exists for backwards compatibility */
1000  		encode_u64_le(buffer, offset, 0);
1001  		encode_u64_le(buffer, offset, partition->count);
1002  	}
1003  
1004  	VDO_ASSERT_LOG_ONLY(header.size == *offset - initial_offset,
1005  			    "encoded size of a layout must match header size");
1006  }
1007  
decode_layout(u8 * buffer,size_t * offset,physical_block_number_t start,block_count_t size,struct layout * layout)1008  static int decode_layout(u8 *buffer, size_t *offset, physical_block_number_t start,
1009  			 block_count_t size, struct layout *layout)
1010  {
1011  	struct header header;
1012  	struct layout_3_0 layout_header;
1013  	struct partition *partition;
1014  	size_t initial_offset;
1015  	physical_block_number_t first_free, last_free;
1016  	u8 partition_count;
1017  	u8 i;
1018  	int result;
1019  
1020  	vdo_decode_header(buffer, offset, &header);
1021  	/* Layout is variable size, so only do a minimum size check here. */
1022  	result = vdo_validate_header(&VDO_LAYOUT_HEADER_3_0, &header, false, __func__);
1023  	if (result != VDO_SUCCESS)
1024  		return result;
1025  
1026  	initial_offset = *offset;
1027  	decode_u64_le(buffer, offset, &first_free);
1028  	decode_u64_le(buffer, offset, &last_free);
1029  	partition_count = buffer[(*offset)++];
1030  	layout_header = (struct layout_3_0) {
1031  		.first_free = first_free,
1032  		.last_free = last_free,
1033  		.partition_count = partition_count,
1034  	};
1035  
1036  	result = VDO_ASSERT(sizeof(struct layout_3_0) == *offset - initial_offset,
1037  			    "decoded size of a layout header must match structure");
1038  	if (result != VDO_SUCCESS)
1039  		return result;
1040  
1041  	layout->start = start;
1042  	layout->size = size;
1043  	layout->first_free = layout_header.first_free;
1044  	layout->last_free = layout_header.last_free;
1045  	layout->num_partitions = layout_header.partition_count;
1046  
1047  	if (layout->num_partitions > VDO_PARTITION_COUNT) {
1048  		return vdo_log_error_strerror(VDO_UNKNOWN_PARTITION,
1049  					      "layout has extra partitions");
1050  	}
1051  
1052  	for (i = 0; i < layout->num_partitions; i++) {
1053  		u8 id;
1054  		u64 partition_offset, count;
1055  
1056  		id = buffer[(*offset)++];
1057  		decode_u64_le(buffer, offset, &partition_offset);
1058  		*offset += sizeof(u64);
1059  		decode_u64_le(buffer, offset, &count);
1060  
1061  		result = allocate_partition(layout, id, partition_offset, count);
1062  		if (result != VDO_SUCCESS) {
1063  			vdo_uninitialize_layout(layout);
1064  			return result;
1065  		}
1066  	}
1067  
1068  	/* Validate that the layout has all (and only) the required partitions */
1069  	for (i = 0; i < VDO_PARTITION_COUNT; i++) {
1070  		result = vdo_get_partition(layout, REQUIRED_PARTITIONS[i], &partition);
1071  		if (result != VDO_SUCCESS) {
1072  			vdo_uninitialize_layout(layout);
1073  			return vdo_log_error_strerror(result,
1074  						      "layout is missing required partition %u",
1075  						      REQUIRED_PARTITIONS[i]);
1076  		}
1077  
1078  		start += partition->count;
1079  	}
1080  
1081  	if (start != size) {
1082  		vdo_uninitialize_layout(layout);
1083  		return vdo_log_error_strerror(UDS_BAD_STATE,
1084  					      "partitions do not cover the layout");
1085  	}
1086  
1087  	return VDO_SUCCESS;
1088  }
1089  
1090  /**
1091   * pack_vdo_config() - Convert a vdo_config to its packed on-disk representation.
1092   * @config: The vdo config to convert.
1093   *
1094   * Return: The platform-independent representation of the config.
1095   */
pack_vdo_config(struct vdo_config config)1096  static struct packed_vdo_config pack_vdo_config(struct vdo_config config)
1097  {
1098  	return (struct packed_vdo_config) {
1099  		.logical_blocks = __cpu_to_le64(config.logical_blocks),
1100  		.physical_blocks = __cpu_to_le64(config.physical_blocks),
1101  		.slab_size = __cpu_to_le64(config.slab_size),
1102  		.recovery_journal_size = __cpu_to_le64(config.recovery_journal_size),
1103  		.slab_journal_blocks = __cpu_to_le64(config.slab_journal_blocks),
1104  	};
1105  }
1106  
1107  /**
1108   * pack_vdo_component() - Convert a vdo_component to its packed on-disk representation.
1109   * @component: The VDO component data to convert.
1110   *
1111   * Return: The platform-independent representation of the component.
1112   */
pack_vdo_component(const struct vdo_component component)1113  static struct packed_vdo_component_41_0 pack_vdo_component(const struct vdo_component component)
1114  {
1115  	return (struct packed_vdo_component_41_0) {
1116  		.state = __cpu_to_le32(component.state),
1117  		.complete_recoveries = __cpu_to_le64(component.complete_recoveries),
1118  		.read_only_recoveries = __cpu_to_le64(component.read_only_recoveries),
1119  		.config = pack_vdo_config(component.config),
1120  		.nonce = __cpu_to_le64(component.nonce),
1121  	};
1122  }
1123  
encode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component component)1124  static void encode_vdo_component(u8 *buffer, size_t *offset,
1125  				 struct vdo_component component)
1126  {
1127  	struct packed_vdo_component_41_0 packed;
1128  
1129  	encode_version_number(buffer, offset, VDO_COMPONENT_DATA_41_0);
1130  	packed = pack_vdo_component(component);
1131  	memcpy(buffer + *offset, &packed, sizeof(packed));
1132  	*offset += sizeof(packed);
1133  }
1134  
1135  /**
1136   * unpack_vdo_config() - Convert a packed_vdo_config to its native in-memory representation.
1137   * @config: The packed vdo config to convert.
1138   *
1139   * Return: The native in-memory representation of the vdo config.
1140   */
unpack_vdo_config(struct packed_vdo_config config)1141  static struct vdo_config unpack_vdo_config(struct packed_vdo_config config)
1142  {
1143  	return (struct vdo_config) {
1144  		.logical_blocks = __le64_to_cpu(config.logical_blocks),
1145  		.physical_blocks = __le64_to_cpu(config.physical_blocks),
1146  		.slab_size = __le64_to_cpu(config.slab_size),
1147  		.recovery_journal_size = __le64_to_cpu(config.recovery_journal_size),
1148  		.slab_journal_blocks = __le64_to_cpu(config.slab_journal_blocks),
1149  	};
1150  }
1151  
1152  /**
1153   * unpack_vdo_component_41_0() - Convert a packed_vdo_component_41_0 to its native in-memory
1154   *				 representation.
1155   * @component: The packed vdo component data to convert.
1156   *
1157   * Return: The native in-memory representation of the component.
1158   */
unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)1159  static struct vdo_component unpack_vdo_component_41_0(struct packed_vdo_component_41_0 component)
1160  {
1161  	return (struct vdo_component) {
1162  		.state = __le32_to_cpu(component.state),
1163  		.complete_recoveries = __le64_to_cpu(component.complete_recoveries),
1164  		.read_only_recoveries = __le64_to_cpu(component.read_only_recoveries),
1165  		.config = unpack_vdo_config(component.config),
1166  		.nonce = __le64_to_cpu(component.nonce),
1167  	};
1168  }
1169  
1170  /**
1171   * decode_vdo_component() - Decode the component data for the vdo itself out of the super block.
1172   *
1173   * Return: VDO_SUCCESS or an error.
1174   */
decode_vdo_component(u8 * buffer,size_t * offset,struct vdo_component * component)1175  static int decode_vdo_component(u8 *buffer, size_t *offset, struct vdo_component *component)
1176  {
1177  	struct version_number version;
1178  	struct packed_vdo_component_41_0 packed;
1179  	int result;
1180  
1181  	decode_version_number(buffer, offset, &version);
1182  	result = validate_version(version, VDO_COMPONENT_DATA_41_0,
1183  				  "VDO component data");
1184  	if (result != VDO_SUCCESS)
1185  		return result;
1186  
1187  	memcpy(&packed, buffer + *offset, sizeof(packed));
1188  	*offset += sizeof(packed);
1189  	*component = unpack_vdo_component_41_0(packed);
1190  	return VDO_SUCCESS;
1191  }
1192  
1193  /**
1194   * vdo_validate_config() - Validate constraints on a VDO config.
1195   * @config: The VDO config.
1196   * @physical_block_count: The minimum block count of the underlying storage.
1197   * @logical_block_count: The expected logical size of the VDO, or 0 if the logical size may be
1198   *			 unspecified.
1199   *
1200   * Return: A success or error code.
1201   */
vdo_validate_config(const struct vdo_config * config,block_count_t physical_block_count,block_count_t logical_block_count)1202  int vdo_validate_config(const struct vdo_config *config,
1203  			block_count_t physical_block_count,
1204  			block_count_t logical_block_count)
1205  {
1206  	struct slab_config slab_config;
1207  	int result;
1208  
1209  	result = VDO_ASSERT(config->slab_size > 0, "slab size unspecified");
1210  	if (result != VDO_SUCCESS)
1211  		return result;
1212  
1213  	result = VDO_ASSERT(is_power_of_2(config->slab_size),
1214  			    "slab size must be a power of two");
1215  	if (result != VDO_SUCCESS)
1216  		return result;
1217  
1218  	result = VDO_ASSERT(config->slab_size <= (1 << MAX_VDO_SLAB_BITS),
1219  			    "slab size must be less than or equal to 2^%d",
1220  			    MAX_VDO_SLAB_BITS);
1221  	if (result != VDO_SUCCESS)
1222  		return result;
1223  
1224  	result = VDO_ASSERT(config->slab_journal_blocks >= MINIMUM_VDO_SLAB_JOURNAL_BLOCKS,
1225  			    "slab journal size meets minimum size");
1226  	if (result != VDO_SUCCESS)
1227  		return result;
1228  
1229  	result = VDO_ASSERT(config->slab_journal_blocks <= config->slab_size,
1230  			    "slab journal size is within expected bound");
1231  	if (result != VDO_SUCCESS)
1232  		return result;
1233  
1234  	result = vdo_configure_slab(config->slab_size, config->slab_journal_blocks,
1235  				    &slab_config);
1236  	if (result != VDO_SUCCESS)
1237  		return result;
1238  
1239  	result = VDO_ASSERT((slab_config.data_blocks >= 1),
1240  			    "slab must be able to hold at least one block");
1241  	if (result != VDO_SUCCESS)
1242  		return result;
1243  
1244  	result = VDO_ASSERT(config->physical_blocks > 0, "physical blocks unspecified");
1245  	if (result != VDO_SUCCESS)
1246  		return result;
1247  
1248  	result = VDO_ASSERT(config->physical_blocks <= MAXIMUM_VDO_PHYSICAL_BLOCKS,
1249  			    "physical block count %llu exceeds maximum %llu",
1250  			    (unsigned long long) config->physical_blocks,
1251  			    (unsigned long long) MAXIMUM_VDO_PHYSICAL_BLOCKS);
1252  	if (result != VDO_SUCCESS)
1253  		return VDO_OUT_OF_RANGE;
1254  
1255  	if (physical_block_count != config->physical_blocks) {
1256  		vdo_log_error("A physical size of %llu blocks was specified, not the %llu blocks configured in the vdo super block",
1257  			      (unsigned long long) physical_block_count,
1258  			      (unsigned long long) config->physical_blocks);
1259  		return VDO_PARAMETER_MISMATCH;
1260  	}
1261  
1262  	if (logical_block_count > 0) {
1263  		result = VDO_ASSERT((config->logical_blocks > 0),
1264  				    "logical blocks unspecified");
1265  		if (result != VDO_SUCCESS)
1266  			return result;
1267  
1268  		if (logical_block_count != config->logical_blocks) {
1269  			vdo_log_error("A logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block",
1270  				      (unsigned long long) logical_block_count,
1271  				      (unsigned long long) config->logical_blocks);
1272  			return VDO_PARAMETER_MISMATCH;
1273  		}
1274  	}
1275  
1276  	result = VDO_ASSERT(config->logical_blocks <= MAXIMUM_VDO_LOGICAL_BLOCKS,
1277  			    "logical blocks too large");
1278  	if (result != VDO_SUCCESS)
1279  		return result;
1280  
1281  	result = VDO_ASSERT(config->recovery_journal_size > 0,
1282  			    "recovery journal size unspecified");
1283  	if (result != VDO_SUCCESS)
1284  		return result;
1285  
1286  	result = VDO_ASSERT(is_power_of_2(config->recovery_journal_size),
1287  			    "recovery journal size must be a power of two");
1288  	if (result != VDO_SUCCESS)
1289  		return result;
1290  
1291  	return result;
1292  }
1293  
1294  /**
1295   * vdo_destroy_component_states() - Clean up any allocations in a vdo_component_states.
1296   * @states: The component states to destroy.
1297   */
vdo_destroy_component_states(struct vdo_component_states * states)1298  void vdo_destroy_component_states(struct vdo_component_states *states)
1299  {
1300  	if (states == NULL)
1301  		return;
1302  
1303  	vdo_uninitialize_layout(&states->layout);
1304  }
1305  
1306  /**
1307   * decode_components() - Decode the components now that we know the component data is a version we
1308   *                       understand.
1309   * @buffer: The buffer being decoded.
1310   * @offset: The offset to start decoding from.
1311   * @geometry: The vdo geometry
1312   * @states: An object to hold the successfully decoded state.
1313   *
1314   * Return: VDO_SUCCESS or an error.
1315   */
decode_components(u8 * buffer,size_t * offset,struct volume_geometry * geometry,struct vdo_component_states * states)1316  static int __must_check decode_components(u8 *buffer, size_t *offset,
1317  					  struct volume_geometry *geometry,
1318  					  struct vdo_component_states *states)
1319  {
1320  	int result;
1321  
1322  	decode_vdo_component(buffer, offset, &states->vdo);
1323  
1324  	result = decode_layout(buffer, offset, vdo_get_data_region_start(*geometry) + 1,
1325  			       states->vdo.config.physical_blocks, &states->layout);
1326  	if (result != VDO_SUCCESS)
1327  		return result;
1328  
1329  	result = decode_recovery_journal_state_7_0(buffer, offset,
1330  						   &states->recovery_journal);
1331  	if (result != VDO_SUCCESS)
1332  		return result;
1333  
1334  	result = decode_slab_depot_state_2_0(buffer, offset, &states->slab_depot);
1335  	if (result != VDO_SUCCESS)
1336  		return result;
1337  
1338  	result = decode_block_map_state_2_0(buffer, offset, &states->block_map);
1339  	if (result != VDO_SUCCESS)
1340  		return result;
1341  
1342  	VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1343  			    "All decoded component data was used");
1344  	return VDO_SUCCESS;
1345  }
1346  
1347  /**
1348   * vdo_decode_component_states() - Decode the payload of a super block.
1349   * @buffer: The buffer containing the encoded super block contents.
1350   * @geometry: The vdo geometry
1351   * @states: A pointer to hold the decoded states.
1352   *
1353   * Return: VDO_SUCCESS or an error.
1354   */
vdo_decode_component_states(u8 * buffer,struct volume_geometry * geometry,struct vdo_component_states * states)1355  int vdo_decode_component_states(u8 *buffer, struct volume_geometry *geometry,
1356  				struct vdo_component_states *states)
1357  {
1358  	int result;
1359  	size_t offset = VDO_COMPONENT_DATA_OFFSET;
1360  
1361  	/* This is for backwards compatibility. */
1362  	decode_u32_le(buffer, &offset, &states->unused);
1363  
1364  	/* Check the VDO volume version */
1365  	decode_version_number(buffer, &offset, &states->volume_version);
1366  	result = validate_version(VDO_VOLUME_VERSION_67_0, states->volume_version,
1367  				  "volume");
1368  	if (result != VDO_SUCCESS)
1369  		return result;
1370  
1371  	result = decode_components(buffer, &offset, geometry, states);
1372  	if (result != VDO_SUCCESS)
1373  		vdo_uninitialize_layout(&states->layout);
1374  
1375  	return result;
1376  }
1377  
1378  /**
1379   * vdo_validate_component_states() - Validate the decoded super block configuration.
1380   * @states: The state decoded from the super block.
1381   * @geometry_nonce: The nonce from the geometry block.
1382   * @physical_size: The minimum block count of the underlying storage.
1383   * @logical_size: The expected logical size of the VDO, or 0 if the logical size may be
1384   *                unspecified.
1385   *
1386   * Return: VDO_SUCCESS or an error if the configuration is invalid.
1387   */
vdo_validate_component_states(struct vdo_component_states * states,nonce_t geometry_nonce,block_count_t physical_size,block_count_t logical_size)1388  int vdo_validate_component_states(struct vdo_component_states *states,
1389  				  nonce_t geometry_nonce, block_count_t physical_size,
1390  				  block_count_t logical_size)
1391  {
1392  	if (geometry_nonce != states->vdo.nonce) {
1393  		return vdo_log_error_strerror(VDO_BAD_NONCE,
1394  					      "Geometry nonce %llu does not match superblock nonce %llu",
1395  					      (unsigned long long) geometry_nonce,
1396  					      (unsigned long long) states->vdo.nonce);
1397  	}
1398  
1399  	return vdo_validate_config(&states->vdo.config, physical_size, logical_size);
1400  }
1401  
1402  /**
1403   * vdo_encode_component_states() - Encode the state of all vdo components in the super block.
1404   */
vdo_encode_component_states(u8 * buffer,size_t * offset,const struct vdo_component_states * states)1405  static void vdo_encode_component_states(u8 *buffer, size_t *offset,
1406  					const struct vdo_component_states *states)
1407  {
1408  	/* This is for backwards compatibility. */
1409  	encode_u32_le(buffer, offset, states->unused);
1410  	encode_version_number(buffer, offset, states->volume_version);
1411  	encode_vdo_component(buffer, offset, states->vdo);
1412  	encode_layout(buffer, offset, &states->layout);
1413  	encode_recovery_journal_state_7_0(buffer, offset, states->recovery_journal);
1414  	encode_slab_depot_state_2_0(buffer, offset, states->slab_depot);
1415  	encode_block_map_state_2_0(buffer, offset, states->block_map);
1416  
1417  	VDO_ASSERT_LOG_ONLY(*offset == VDO_COMPONENT_DATA_OFFSET + VDO_COMPONENT_DATA_SIZE,
1418  			    "All super block component data was encoded");
1419  }
1420  
1421  /**
1422   * vdo_encode_super_block() - Encode a super block into its on-disk representation.
1423   */
vdo_encode_super_block(u8 * buffer,struct vdo_component_states * states)1424  void vdo_encode_super_block(u8 *buffer, struct vdo_component_states *states)
1425  {
1426  	u32 checksum;
1427  	struct header header = SUPER_BLOCK_HEADER_12_0;
1428  	size_t offset = 0;
1429  
1430  	header.size += VDO_COMPONENT_DATA_SIZE;
1431  	vdo_encode_header(buffer, &offset, &header);
1432  	vdo_encode_component_states(buffer, &offset, states);
1433  
1434  	checksum = vdo_crc32(buffer, offset);
1435  	encode_u32_le(buffer, &offset, checksum);
1436  
1437  	/*
1438  	 * Even though the buffer is a full block, to avoid the potential corruption from a torn
1439  	 * write, the entire encoding must fit in the first sector.
1440  	 */
1441  	VDO_ASSERT_LOG_ONLY(offset <= VDO_SECTOR_SIZE,
1442  			    "entire superblock must fit in one sector");
1443  }
1444  
1445  /**
1446   * vdo_decode_super_block() - Decode a super block from its on-disk representation.
1447   */
vdo_decode_super_block(u8 * buffer)1448  int vdo_decode_super_block(u8 *buffer)
1449  {
1450  	struct header header;
1451  	int result;
1452  	u32 checksum, saved_checksum;
1453  	size_t offset = 0;
1454  
1455  	/* Decode and validate the header. */
1456  	vdo_decode_header(buffer, &offset, &header);
1457  	result = vdo_validate_header(&SUPER_BLOCK_HEADER_12_0, &header, false, __func__);
1458  	if (result != VDO_SUCCESS)
1459  		return result;
1460  
1461  	if (header.size > VDO_COMPONENT_DATA_SIZE + sizeof(u32)) {
1462  		/*
1463  		 * We can't check release version or checksum until we know the content size, so we
1464  		 * have to assume a version mismatch on unexpected values.
1465  		 */
1466  		return vdo_log_error_strerror(VDO_UNSUPPORTED_VERSION,
1467  					      "super block contents too large: %zu",
1468  					      header.size);
1469  	}
1470  
1471  	/* Skip past the component data for now, to verify the checksum. */
1472  	offset += VDO_COMPONENT_DATA_SIZE;
1473  
1474  	checksum = vdo_crc32(buffer, offset);
1475  	decode_u32_le(buffer, &offset, &saved_checksum);
1476  
1477  	result = VDO_ASSERT(offset == VDO_SUPER_BLOCK_FIXED_SIZE + VDO_COMPONENT_DATA_SIZE,
1478  			    "must have decoded entire superblock payload");
1479  	if (result != VDO_SUCCESS)
1480  		return result;
1481  
1482  	return ((checksum != saved_checksum) ? VDO_CHECKSUM_MISMATCH : VDO_SUCCESS);
1483  }
1484