From 2a70d72730b91905543e5d5e850bc00cf43bc5e6 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Duje=20Mihanovi=C4=87?= <duje.mihanovic@skole.hr>
Date: Sat, 30 Apr 2022 16:35:18 +0200
Subject: [PATCH] Fixes and improvements to FAT32 driver

* Fix issue where multiple clusters would be loaded 0x800 apart rather than
  contiguously.
* Get rid of some global variables, saving space.
* si points to the start LBA address rather than the partition table entry,
  saving a bit of space as we no longer need to use offsets.
* In read_cluster_chain, drop push/popad as they in fact do not
  save the upper 16 bytes of the registers. Instead, just push
  and pops the registers that are used.
---
 boot/x86/fat32-structs.s |  9 ------
 boot/x86/fat32.s         | 61 ++++++++++++++++++++++++----------------
 boot/x86/mbr.s           | 28 ++++++++----------
 boot/x86/vbr-fat32.s     | 13 +++++----
 4 files changed, 55 insertions(+), 56 deletions(-)

diff --git a/boot/x86/fat32-structs.s b/boot/x86/fat32-structs.s
index 23980ba..e72735d 100644
--- a/boot/x86/fat32-structs.s
+++ b/boot/x86/fat32-structs.s
@@ -1,15 +1,6 @@
 ; FAT32 data structures
 ; Because they have to be defined before use
 
-struc part_entry
-	.attrib: resb 1
-	.chs_start: resb 3
-	.type: resb 1
-	.chs_end: resb 3
-	.lba_start: resd 1
-	.lba_end: resd 1
-endstruc
-
 struc dir_entry
 	.name: resb 11
 	.attr: resb 1
diff --git a/boot/x86/fat32.s b/boot/x86/fat32.s
index 31288cb..caed036 100644
--- a/boot/x86/fat32.s
+++ b/boot/x86/fat32.s
@@ -1,12 +1,20 @@
 ; FAT32 driver
-; BOOT_DRIVE, part_table_entry and bp must be set up by the code using this driver.
-; first_data_sec is to be set up by calling get_1st_data_sec.
+; BOOT_DRIVE, si and bp must be set up by the code using this driver.
+; (Specifically, si must be a pointer to the partition's partition
+; table entry and bp must be a pointer to the BPB.)
+; Before reading any cluster chains, callers must call get_1st_data_sec
+; and must be careful not to trash ecx after calling it.
 
 ; eax - start cluster number
 ; es:di - where to load the cluster chain
 ; NOTE: Cluster chain might be the root directory or a file.
 read_cluster_chain:
-	pusha
+	push eax
+	push ebx
+	push ecx
+	push edx
+	push es
+	push di
 .loop:
 	; get the first sector of the cluster to read
 	push eax
@@ -14,13 +22,15 @@ read_cluster_chain:
 	xor ebx, ebx
 	mov bl, BPB_SecPerClus
 	mul ebx
-	add eax, [first_data_sec]
+	add eax, ecx
 	mov ebx, eax
 	pop eax
 
 	; read the cluster
+	push cx
 	movzx cx, BPB_SecPerClus
 	call read_sectors
+	pop cx
 
 	; increment the load offset (and segment if needed)
 	push eax
@@ -36,22 +46,21 @@ read_cluster_chain:
 .get_next_cluster:
 	pop eax
 	; get FAT sector number and offset containing the next cluster number
-	xor edx, edx
-	mov ebx, 4
-	mul dword ebx
+	xor ebx, ebx
+	mov bl, 4 ; put 4 in EBX, doing this instead of "mov ebx, 4" saves us an entire byte
+	mul ebx
 	movzx ebx, word BPB_BytsPerSec
-	div dword ebx
+	div ebx
 	movzx ebx, word BPB_RsvdSecCnt
 	add eax, ebx
-	push bp
-	mov bp, [part_table_entry]
-	add eax, [bp+part_entry.lba_start]
-	pop bp
+	add eax, [si]
 	mov ebx, edx
 	; reminder for myself: EAX is FAT sector number, (E)BX is offset into FAT sector
 	
 	; load the FAT sector we're looking for
 
+	push cx
+	push di
 	push ebx ; offset
 	push es ; we want to read at 0:1000, not STAGE3_SEGMENT:1000
 	push eax ; desired LBA
@@ -65,15 +74,20 @@ read_cluster_chain:
 	; find the cluster we're looking for
 	pop es ; restore STAGE3_SEGMENT
 	pop ebx ; pop FAT offset back into EBX for cmp
-	cmp dword [di+bx], 0xffffff7
-	jge .done ; if cluster number is greater than or equal to the defective cluster value, we're done
-		  ; TODO: should this perhaps be changed so that equal makes it error out?
-	; otherwise, load the next sector number in eax and read again
 	mov eax, [di+bx]
-	jmp .loop
+	pop di
+	pop cx
+	cmp eax, 0xffffff7
+	jl .loop ; if cluster number is lower than the defective cluster value, we're not done
+		 ; TODO: perhaps check is it equal defective and error out in that case?
 .done:
 	; cleanup and return
-	popa
+	pop di
+	pop es
+	pop edx
+	pop ecx
+	pop ebx
+	pop eax
 	ret
 
 ; es:di - where to load the sector(s)
@@ -101,18 +115,17 @@ read_sectors:
 		hlt
 		jmp $-1
 
-; no arguments
+; ds:si - pointer to partition table entry
+; RETURN: ecx - first data sector
 get_1st_data_sec:
 	push eax
 	push ebx
-	xor eax, eax
 	movzx ax, BPB_NumFats 
 	mul dword BPB_FatSz32 ; space occupied by all FATs
 	movzx ebx, word BPB_RsvdSecCnt
 	add eax, ebx ; space occupied by the reserved area
-	mov bx, [part_table_entry]
-	add eax, [bx+part_entry.lba_start] ; space before the partition
-	mov [first_data_sec], eax
+	add eax, [si] ; space before the partition
+	mov ecx, eax
 	pop ebx
 	pop eax
 	ret
@@ -120,5 +133,3 @@ get_1st_data_sec:
 read_error: db "Read error", 0
 
 BOOT_DRIVE: db 0
-part_table_entry: dw 0
-first_data_sec: dd 0
diff --git a/boot/x86/mbr.s b/boot/x86/mbr.s
index 8089b2f..613b248 100644
--- a/boot/x86/mbr.s
+++ b/boot/x86/mbr.s
@@ -5,7 +5,7 @@ cpu 686
 org 0x600
 
 _start:
-	cli
+	cli ; we really don't want to be interrupted during relocation
 	; set up segment registers
 	xor ax, ax
 	mov ds, ax
@@ -27,55 +27,51 @@ real_start:
 	call check_int13_ext
 	mov byte [BOOT_DRIVE], dl
 	; look for active partition
-	mov bx, part_1
+	mov si, part_1
 	mov cx, 4
 	.check_part_loop:
-		mov al, [bx]
+		mov al, [si]
 		test al, 0x80
 		jnz .part_found
-		add bx, 16
+		add si, 16
 		loop .check_part_loop
 		.error:
 			mov si, no_os
 			call print
-			cli
 			hlt
 			jmp short $-1
 	.part_found:
 		; look for any other active partitions, if they exist the partition table is invalid
 		cmp cx, 0
 		je .load_vbr
-		push bx
+		push si 
 	.look_other_loop:
-		add bx, 16
-		mov al, [bx]
+		add si, 16
+		mov al, [si]
 		test al, 0x80
 		jnz .invalid_mbr
 		loop .look_other_loop
 	.load_vbr:
 		; load active partition's VBR
-		pop bx
+		pop si
 		mov ax, 0x7c0
-		mov edx, [bx+0x8]
-		push bx ; save pointer to partition table entry
+		add si, 8
+		mov edx, [si]
 		xor bx, bx
 		call read_sector
 		; check is the VBR bootable (ends with 0x55 0xaa), if not halt
 		cmp word [0x7dfe], 0xaa55
 		jne .not_bootable
 		mov dl, [BOOT_DRIVE]
-		pop si ; hand off partition table entry to VBR
-		jmp 0x7c00
+		call 0x7c00
 		.not_bootable:
 			mov si, no_os
 			call print
-			cli
 			hlt
 			jmp short $-1
 	.invalid_mbr:
 		mov si, invalid_mbr
 		call print
-		cli
 		hlt
 		jmp short $-1
 check_int13_ext:
@@ -92,7 +88,6 @@ check_int13_ext:
 	.no_ext:
 		mov si, no_int13_ext
 		call print
-		cli
 		hlt
 		jmp short $-1
 
@@ -119,7 +114,6 @@ read_sector:
 	.error:
 		mov si, read_fail
 		call print
-		cli
 		hlt
 		jmp short $-1
 
diff --git a/boot/x86/vbr-fat32.s b/boot/x86/vbr-fat32.s
index 061ca52..b4c7148 100644
--- a/boot/x86/vbr-fat32.s
+++ b/boot/x86/vbr-fat32.s
@@ -24,7 +24,6 @@ real_start:
 
 	; we expect the boot drive to be in DL and our partition table entry in DS:SI
 	mov [BOOT_DRIVE], dl
-	mov [part_table_entry], si
 
 	; calculate the 1st sector of the data area
 	call get_1st_data_sec
@@ -37,7 +36,9 @@ real_start:
 	mov eax, BPB_RootClus
 	call read_cluster_chain
 
+	push cx
 	mov cx, 11
+	push si
 .find_stage_3:
 	mov si, STAGE3_NAME
 	cmp byte [es:di], 0 ; we have no more entries to look at
@@ -53,11 +54,14 @@ real_start:
 	add bx, 0x1000
 	mov es, bx
 .stage3_found:
-	sub di, 0xb ; "repe cmpsb" incremented this
+	pop si
+	pop cx
+	add di, 9 ; knowing that cmpsb incremented this by 11, we can also increment it by 9
+		       ; right here so we can use 1 less offset below
 	; stage 3 has been found and ES:DI points to its directory entry
-	mov ax, [es:di+dir_entry.firstclushi] ; load high half of the 1st cluster
+	mov ax, [es:di] ; load high half of the 1st cluster
 	shl eax, 16
-	mov ax, [es:di+dir_entry.firstcluslo] ; load low half of the 1st cluster
+	mov ax, [es:di+(dir_entry.firstcluslo-dir_entry.firstclushi)] ; load low half of the 1st cluster
 	mov bx, STAGE3_SEGMENT
 	mov es, bx
 	mov di, STAGE3_OFFSET
@@ -70,7 +74,6 @@ real_start:
 	mov si, stage3_missing
 	call print
 .halt:
-	cli
 	hlt
 	jmp short $-1
 
-- 
2.39.5