blob: 67f1ddd08451f8822f7c4e337f71adfb60f57b15 [file] [log] [blame]
bigbiff7b4c7a62015-01-01 19:44:14 -05001
2#ifdef HAVE_LIBBLKID
3#include <blkid.h>
4#endif
5#include "blkdev.h"
6
7#include "fdiskP.h"
8
9/**
10 * SECTION: alignment
11 * @title: Alignment
12 * @short_description: functions to align partitions and work with disk topology and geometry
13 *
14 * The libfdisk aligns the end of the partitions to make it possible to align
15 * the next partition to the "grain" (see fdisk_get_grain()). The grain is
16 * usually 1MiB (or more for devices where optimal I/O is greater than 1MiB).
17 *
18 * It means that the library does not align strictly to physical sector size
19 * (or minimal or optimal I/O), but it uses greater granularity. It makes
20 * partition tables more portable. If you copy disk layout from 512-sector to
21 * 4K-sector device, all partitions are still aligned to physical sectors.
22 *
23 * This unified concept also makes partition tables more user friendly, all
24 * tables look same, LBA of the first partition is 2048 sectors everywhere, etc.
25 *
26 * It's recommended to not change any alignment or device properties. All is
27 * initialized by default by fdisk_assign_device().
28 *
29 * Note that terminology used by libfdisk is:
30 * - device properties: I/O limits (topology), geometry, sector size, ...
31 * - alignment: first, last LBA, grain, ...
32 *
33 * The alignment setting may be modified by disk label driver.
34 */
35
36/*
37 * Alignment according to logical granularity (usually 1MiB)
38 */
39static int lba_is_aligned(struct fdisk_context *cxt, fdisk_sector_t lba)
40{
41 unsigned long granularity = max(cxt->phy_sector_size, cxt->min_io_size);
42 uintmax_t offset;
43
44 if (cxt->grain > granularity)
45 granularity = cxt->grain;
46 offset = (lba * cxt->sector_size) & (granularity - 1);
47
48 return !((granularity + cxt->alignment_offset - offset) & (granularity - 1));
49}
50
51/*
52 * Alignment according to physical device topology (usually minimal i/o size)
53 */
54static int lba_is_phy_aligned(struct fdisk_context *cxt, fdisk_sector_t lba)
55{
56 unsigned long granularity = max(cxt->phy_sector_size, cxt->min_io_size);
57 uintmax_t offset = (lba * cxt->sector_size) & (granularity - 1);
58
59 return !((granularity + cxt->alignment_offset - offset) & (granularity - 1));
60}
61
62/**
63 * fdisk_align_lba:
64 * @cxt: context
65 * @lba: address to align
66 * @direction: FDISK_ALIGN_{UP,DOWN,NEAREST}
67 *
68 * This function aligns @lba to the "grain" (see fdisk_get_grain()). If the
69 * device uses alignment offset then the result is moved according the offset
70 * to be on the physical boundary.
71 *
72 * Returns: alignment LBA.
73 */
74fdisk_sector_t fdisk_align_lba(struct fdisk_context *cxt, fdisk_sector_t lba, int direction)
75{
76 fdisk_sector_t res;
77
78 if (lba_is_aligned(cxt, lba))
79 res = lba;
80 else {
81 fdisk_sector_t sects_in_phy = cxt->grain / cxt->sector_size;
82
83 if (lba < cxt->first_lba)
84 res = cxt->first_lba;
85
86 else if (direction == FDISK_ALIGN_UP)
87 res = ((lba + sects_in_phy) / sects_in_phy) * sects_in_phy;
88
89 else if (direction == FDISK_ALIGN_DOWN)
90 res = (lba / sects_in_phy) * sects_in_phy;
91
92 else /* FDISK_ALIGN_NEAREST */
93 res = ((lba + sects_in_phy / 2) / sects_in_phy) * sects_in_phy;
94
95 if (cxt->alignment_offset && !lba_is_aligned(cxt, res) &&
96 res > cxt->alignment_offset / cxt->sector_size) {
97 /*
98 * apply alignment_offset
99 *
100 * On disk with alignment compensation physical blocks starts
101 * at LBA < 0 (usually LBA -1). It means we have to move LBA
102 * according the offset to be on the physical boundary.
103 */
104 /* fprintf(stderr, "LBA: %llu apply alignment_offset\n", res); */
105 res -= (max(cxt->phy_sector_size, cxt->min_io_size) -
106 cxt->alignment_offset) / cxt->sector_size;
107
108 if (direction == FDISK_ALIGN_UP && res < lba)
109 res += sects_in_phy;
110 }
111 }
112
113 if (lba != res)
114 DBG(CXT, ul_debugobj(cxt, "LBA %ju -aligned-to-> %ju",
115 (uintmax_t) lba,
116 (uintmax_t) res));
117 return res;
118}
119
120/**
121 * fdisk_align_lba_in_range:
122 * @cxt: context
123 * @lba: LBA
124 * @start: range start
125 * @stop: range stop
126 *
127 * Align @lba, the result has to be between @start and @stop
128 *
129 * Returns: aligned LBA
130 */
131fdisk_sector_t fdisk_align_lba_in_range(struct fdisk_context *cxt,
132 fdisk_sector_t lba, fdisk_sector_t start, fdisk_sector_t stop)
133{
134 fdisk_sector_t res;
135
136 start = fdisk_align_lba(cxt, start, FDISK_ALIGN_UP);
137 stop = fdisk_align_lba(cxt, stop, FDISK_ALIGN_DOWN);
138 lba = fdisk_align_lba(cxt, lba, FDISK_ALIGN_NEAREST);
139
140 if (lba < start)
141 res = start;
142 else if (lba > stop)
143 res = stop;
144 else
145 res = lba;
146
147 DBG(CXT, ul_debugobj(cxt, "LBA %ju range:<%ju..%ju>, result: %ju",
148 (uintmax_t) lba,
149 (uintmax_t) start,
150 (uintmax_t) stop,
151 (uintmax_t) res));
152 return res;
153}
154
155/**
156 * fdisk_lba_is_phy_aligned:
157 * @cxt: context
158 * @lba: LBA to check
159 *
160 * Check if the @lba is aligned to physical sector boundary.
161 *
162 * Returns: 1 if aligned.
163 */
164int fdisk_lba_is_phy_aligned(struct fdisk_context *cxt, fdisk_sector_t lba)
165{
166 return lba_is_phy_aligned(cxt, lba);
167}
168
169static unsigned long get_sector_size(int fd)
170{
171 int sect_sz;
172
173 if (!blkdev_get_sector_size(fd, &sect_sz))
174 return (unsigned long) sect_sz;
175 return DEFAULT_SECTOR_SIZE;
176}
177
178static void recount_geometry(struct fdisk_context *cxt)
179{
180 if (!cxt->geom.heads)
181 cxt->geom.heads = 255;
182 if (!cxt->geom.sectors)
183 cxt->geom.sectors = 63;
184
185 cxt->geom.cylinders = cxt->total_sectors /
186 (cxt->geom.heads * cxt->geom.sectors);
187}
188
189/**
190 * fdisk_override_geometry:
191 * @cxt: fdisk context
192 * @cylinders: user specified cylinders
193 * @heads: user specified heads
194 * @sectors: user specified sectors
195 *
196 * Overrides auto-discovery. The function fdisk_reset_device_properties()
197 * restores the original setting.
198 *
199 * The difference between fdisk_override_geometry() and fdisk_save_user_geometry()
200 * is that saved user geometry is persistent setting and it's applied always
201 * when device is assigned to the context or device properties are reseted.
202 *
203 * Returns: 0 on success, < 0 on error.
204 */
205int fdisk_override_geometry(struct fdisk_context *cxt,
206 unsigned int cylinders,
207 unsigned int heads,
208 unsigned int sectors)
209{
210 if (!cxt)
211 return -EINVAL;
212 if (heads)
213 cxt->geom.heads = heads;
214 if (sectors)
215 cxt->geom.sectors = sectors;
216
217 if (cylinders)
218 cxt->geom.cylinders = cylinders;
219 else
220 recount_geometry(cxt);
221
222 fdisk_reset_alignment(cxt);
223
224 DBG(CXT, ul_debugobj(cxt, "override C/H/S: %u/%u/%u",
225 (unsigned) cxt->geom.cylinders,
226 (unsigned) cxt->geom.heads,
227 (unsigned) cxt->geom.sectors));
228
229 return 0;
230}
231
232/**
233 * fdisk_save_user_geometry:
234 * @cxt: context
235 * @cylinders: C
236 * @heads: H
237 * @sectors: S
238 *
239 * Save user defined geometry to use it for partitioning.
240 *
241 * The user properties are applied by fdisk_assign_device() or
242 * fdisk_reset_device_properties().
243
244 * Returns: <0 on error, 0 on success.
245 */
246int fdisk_save_user_geometry(struct fdisk_context *cxt,
247 unsigned int cylinders,
248 unsigned int heads,
249 unsigned int sectors)
250{
251 if (!cxt)
252 return -EINVAL;
253
254 if (heads)
255 cxt->user_geom.heads = heads > 256 ? 0 : heads;
256 if (sectors)
257 cxt->user_geom.sectors = sectors >= 64 ? 0 : sectors;
258 if (cylinders)
259 cxt->user_geom.cylinders = cylinders;
260
261 DBG(CXT, ul_debugobj(cxt, "user C/H/S: %u/%u/%u",
262 (unsigned) cxt->user_geom.cylinders,
263 (unsigned) cxt->user_geom.heads,
264 (unsigned) cxt->user_geom.sectors));
265
266 return 0;
267}
268
269/**
270 * fdisk_save_user_sector_size:
271 * @cxt: context
272 * @phy: physical sector size
273 * @log: logicla sector size
274 *
275 * Save user defined sector sizes to use it for partitioning.
276 *
277 * The user properties are applied by fdisk_assign_device() or
278 * fdisk_reset_device_properties().
279 *
280 * Returns: <0 on error, 0 on success.
281 */
282int fdisk_save_user_sector_size(struct fdisk_context *cxt,
283 unsigned int phy,
284 unsigned int log)
285{
286 if (!cxt)
287 return -EINVAL;
288
289 DBG(CXT, ul_debugobj(cxt, "user phy/log sector size: %u/%u", phy, log));
290
291 cxt->user_pyh_sector = phy;
292 cxt->user_log_sector = log;
293
294 return 0;
295}
296
297/**
298 * fdisk_has_user_device_properties:
299 * @cxt: context
300 *
301 * Returns: 1 if user specified any properties
302 */
303int fdisk_has_user_device_properties(struct fdisk_context *cxt)
304{
305 return (cxt->user_pyh_sector
306 || cxt->user_log_sector
307 || cxt->user_geom.heads
308 || cxt->user_geom.sectors
309 || cxt->user_geom.cylinders);
310}
311
312int fdisk_apply_user_device_properties(struct fdisk_context *cxt)
313{
314 if (!cxt)
315 return -EINVAL;
316
317 DBG(CXT, ul_debugobj(cxt, "appling user device properties"));
318
319 if (cxt->user_pyh_sector)
320 cxt->phy_sector_size = cxt->user_pyh_sector;
321 if (cxt->user_log_sector)
322 cxt->sector_size = cxt->min_io_size =
323 cxt->io_size = cxt->user_log_sector;
324
325 if (cxt->user_geom.heads)
326 cxt->geom.heads = cxt->user_geom.heads;
327 if (cxt->user_geom.sectors)
328 cxt->geom.sectors = cxt->user_geom.sectors;
329
330 if (cxt->user_geom.cylinders)
331 cxt->geom.cylinders = cxt->user_geom.cylinders;
332 else if (cxt->user_geom.heads || cxt->user_geom.sectors)
333 recount_geometry(cxt);
334
335 fdisk_reset_alignment(cxt);
336 if (cxt->firstsector_bufsz != cxt->sector_size)
337 fdisk_read_firstsector(cxt);
338
339 DBG(CXT, ul_debugobj(cxt, "new C/H/S: %u/%u/%u",
340 (unsigned) cxt->geom.cylinders,
341 (unsigned) cxt->geom.heads,
342 (unsigned) cxt->geom.sectors));
343 DBG(CXT, ul_debugobj(cxt, "new log/phy sector size: %u/%u",
344 (unsigned) cxt->sector_size,
345 (unsigned) cxt->phy_sector_size));
346
347 return 0;
348}
349
350void fdisk_zeroize_device_properties(struct fdisk_context *cxt)
351{
352 assert(cxt);
353
354 cxt->io_size = 0;
355 cxt->optimal_io_size = 0;
356 cxt->min_io_size = 0;
357 cxt->phy_sector_size = 0;
358 cxt->sector_size = 0;
359 cxt->alignment_offset = 0;
360 cxt->grain = 0;
361 cxt->first_lba = 0;
362 cxt->last_lba = 0;
363 cxt->total_sectors = 0;
364
365 memset(&cxt->geom, 0, sizeof(struct fdisk_geometry));
366}
367
368/**
369 * fdisk_reset_device_properties:
370 * @cxt: context
371 *
372 * Resets and discovery topology (I/O limits), geometry, re-read the first
373 * rector on the device if necessary and apply user device setting (geometry
374 * and sector size), then initialize alignment according to label driver (see
375 * fdisk_reset_alignment()).
376 *
377 * You don't have to use this function by default, fdisk_assign_device() is
378 * smart enough to initialize all necessary setting.
379 *
380 * Returns: 0 on success, <0 on error.
381 */
382int fdisk_reset_device_properties(struct fdisk_context *cxt)
383{
384 int rc;
385
386 if (!cxt)
387 return -EINVAL;
388
389 DBG(CXT, ul_debugobj(cxt, "*** reseting device properties"));
390
391 fdisk_zeroize_device_properties(cxt);
392 fdisk_discover_topology(cxt);
393 fdisk_discover_geometry(cxt);
394
395 rc = fdisk_read_firstsector(cxt);
396 if (rc)
397 return rc;
398
399 fdisk_apply_user_device_properties(cxt);
400 return 0;
401}
402
403/*
404 * Generic (label independent) geometry
405 */
406int fdisk_discover_geometry(struct fdisk_context *cxt)
407{
408 fdisk_sector_t nsects;
409
410 assert(cxt);
411 assert(cxt->geom.heads == 0);
412
413 DBG(CXT, ul_debugobj(cxt, "%s: discovering geometry...", cxt->dev_path));
414
415 /* get number of 512-byte sectors, and convert it the real sectors */
416 if (!blkdev_get_sectors(cxt->dev_fd, (unsigned long long *) &nsects))
417 cxt->total_sectors = (nsects / (cxt->sector_size >> 9));
418
419 DBG(CXT, ul_debugobj(cxt, "total sectors: %ju (ioctl=%ju)",
420 (uintmax_t) cxt->total_sectors,
421 (uintmax_t) nsects));
422
423 /* what the kernel/bios thinks the geometry is */
424 blkdev_get_geometry(cxt->dev_fd, &cxt->geom.heads, (unsigned int *) &cxt->geom.sectors);
425
426 /* obtained heads and sectors */
427 recount_geometry(cxt);
428
429 DBG(CXT, ul_debugobj(cxt, "result: C/H/S: %u/%u/%u",
430 (unsigned) cxt->geom.cylinders,
431 (unsigned) cxt->geom.heads,
432 (unsigned) cxt->geom.sectors));
433 return 0;
434}
435
436int fdisk_discover_topology(struct fdisk_context *cxt)
437{
438#ifdef HAVE_LIBBLKID
439 blkid_probe pr;
440#endif
441 assert(cxt);
442 assert(cxt->sector_size == 0);
443
444 DBG(CXT, ul_debugobj(cxt, "%s: discovering topology...", cxt->dev_path));
445#ifdef HAVE_LIBBLKID
446 DBG(CXT, ul_debugobj(cxt, "initialize libblkid prober"));
447
448 pr = blkid_new_probe();
449 if (pr && blkid_probe_set_device(pr, cxt->dev_fd, 0, 0) == 0) {
450 blkid_topology tp = blkid_probe_get_topology(pr);
451
452 if (tp) {
453 cxt->min_io_size = blkid_topology_get_minimum_io_size(tp);
454 cxt->optimal_io_size = blkid_topology_get_optimal_io_size(tp);
455 cxt->phy_sector_size = blkid_topology_get_physical_sector_size(tp);
456 cxt->alignment_offset = blkid_topology_get_alignment_offset(tp);
457
458 /* I/O size used by fdisk */
459 cxt->io_size = cxt->optimal_io_size;
460 if (!cxt->io_size)
461 /* optimal IO is optional, default to minimum IO */
462 cxt->io_size = cxt->min_io_size;
463 }
464 }
465 blkid_free_probe(pr);
466#endif
467
468 cxt->sector_size = get_sector_size(cxt->dev_fd);
469 if (!cxt->phy_sector_size) /* could not discover physical size */
470 cxt->phy_sector_size = cxt->sector_size;
471
472 /* no blkid or error, use default values */
473 if (!cxt->min_io_size)
474 cxt->min_io_size = cxt->sector_size;
475 if (!cxt->io_size)
476 cxt->io_size = cxt->sector_size;
477
478 DBG(CXT, ul_debugobj(cxt, "result: log/phy sector size: %ld/%ld",
479 cxt->sector_size, cxt->phy_sector_size));
480 DBG(CXT, ul_debugobj(cxt, "result: fdisk/min/optimal io: %ld/%ld/%ld",
481 cxt->io_size, cxt->optimal_io_size, cxt->min_io_size));
482 return 0;
483}
484
485static int has_topology(struct fdisk_context *cxt)
486{
487 /*
488 * Assume that the device provides topology info if
489 * optimal_io_size is set or alignment_offset is set or
490 * minimum_io_size is not power of 2.
491 */
492 if (cxt &&
493 (cxt->optimal_io_size ||
494 cxt->alignment_offset ||
495 !is_power_of_2(cxt->min_io_size)))
496 return 1;
497 return 0;
498}
499
500/*
501 * The LBA of the first partition is based on the device geometry and topology.
502 * This offset is generic (and recommended) for all labels.
503 *
504 * Returns: 0 on error or number of logical sectors.
505 */
506static fdisk_sector_t topology_get_first_lba(struct fdisk_context *cxt)
507{
508 fdisk_sector_t x = 0, res;
509
510 if (!cxt)
511 return 0;
512
513 if (!cxt->io_size)
514 fdisk_discover_topology(cxt);
515
516 /*
517 * Align the begin of partitions to:
518 *
519 * a) topology
520 * a2) alignment offset
521 * a1) or physical sector (minimal_io_size, aka "grain")
522 *
523 * b) or default to 1MiB (2048 sectrors, Windows Vista default)
524 *
525 * c) or for very small devices use 1 phy.sector
526 */
527 if (has_topology(cxt)) {
528 if (cxt->alignment_offset)
529 x = cxt->alignment_offset;
530 else if (cxt->io_size > 2048 * 512)
531 x = cxt->io_size;
532 }
533 /* default to 1MiB */
534 if (!x)
535 x = 2048 * 512;
536
537 res = x / cxt->sector_size;
538
539 /* don't use huge offset on small devices */
540 if (cxt->total_sectors <= res * 4)
541 res = cxt->phy_sector_size / cxt->sector_size;
542
543 return res;
544}
545
546static unsigned long topology_get_grain(struct fdisk_context *cxt)
547{
548 unsigned long res;
549
550 if (!cxt)
551 return 0;
552
553 if (!cxt->io_size)
554 fdisk_discover_topology(cxt);
555
556 res = cxt->io_size;
557
558 /* use 1MiB grain always when possible */
559 if (res < 2048 * 512)
560 res = 2048 * 512;
561
562 /* don't use huge grain on small devices */
563 if (cxt->total_sectors <= (res * 4 / cxt->sector_size))
564 res = cxt->phy_sector_size;
565
566 return res;
567}
568
569/**
570 * fdisk_reset_alignment:
571 * @cxt: fdisk context
572 *
573 * Resets alignment setting to the default and label specific values. This
574 * function does not change device properties (I/O limits, geometry etc.).
575 *
576 * Returns: 0 on success, < 0 in case of error.
577 */
578int fdisk_reset_alignment(struct fdisk_context *cxt)
579{
580 int rc = 0;
581
582 if (!cxt)
583 return -EINVAL;
584
585 DBG(CXT, ul_debugobj(cxt, "reseting alignment..."));
586
587 /* default */
588 cxt->grain = topology_get_grain(cxt);
589 cxt->first_lba = topology_get_first_lba(cxt);
590 cxt->last_lba = cxt->total_sectors - 1;
591
592 /* overwrite default by label stuff */
593 if (cxt->label && cxt->label->op->reset_alignment)
594 rc = cxt->label->op->reset_alignment(cxt);
595
596 DBG(CXT, ul_debugobj(cxt, "alignment reseted to: "
597 "first LBA=%ju, last LBA=%ju, grain=%lu [rc=%d]",
598 (uintmax_t) cxt->first_lba, (uintmax_t) cxt->last_lba,
599 cxt->grain, rc));
600 return rc;
601}
602
603
604fdisk_sector_t fdisk_scround(struct fdisk_context *cxt, fdisk_sector_t num)
605{
606 fdisk_sector_t un = fdisk_get_units_per_sector(cxt);
607 return (num + un - 1) / un;
608}
609
610fdisk_sector_t fdisk_cround(struct fdisk_context *cxt, fdisk_sector_t num)
611{
612 return fdisk_use_cylinders(cxt) ?
613 (num / fdisk_get_units_per_sector(cxt)) + 1 : num;
614}
615
616/**
617 * fdisk_reread_partition_table:
618 * @cxt: context
619 *
620 * Force *kernel* to re-read partition table on block devices.
621 *
622 * Returns: 0 on success, < 0 in case of error.
623 */
624int fdisk_reread_partition_table(struct fdisk_context *cxt)
625{
626 int i;
627 struct stat statbuf;
628
629 assert(cxt);
630 assert(cxt->dev_fd >= 0);
631
632 i = fstat(cxt->dev_fd, &statbuf);
633 if (i == 0 && S_ISBLK(statbuf.st_mode)) {
634 sync();
635#ifdef BLKRRPART
636 fdisk_info(cxt, _("Calling ioctl() to re-read partition table."));
637 i = ioctl(cxt->dev_fd, BLKRRPART);
638#else
639 errno = ENOSYS;
640 i = 1;
641#endif
642 }
643
644 if (i) {
645 fdisk_warn(cxt, _("Re-reading the partition table failed."));
646 fdisk_info(cxt, _(
647 "The kernel still uses the old table. The "
648 "new table will be used at the next reboot "
649 "or after you run partprobe(8) or kpartx(8)."));
650 return -errno;
651 }
652
653 return 0;
654}