; This program is made by Daniel Horchner. Based on Thomas Pytel's START32 /
; Adam Seychell's DOS32 v1.2 (Who was first?)
; Read RAW32.TXT!
; email: dbjh@gmx.net

%define NO_EXTERN       0       ; 'extern' directives must not be included
%include "raw32.inc"
extern  main
global  DPMImem,VCPImem,XMSmem  ; Made public for debugging purposes

LOWMIN          equ     0       ; minimum conventional memory needed (in KB)
HIGHMIN         equ     0       ; minimum extended memory needed (in KB)
RM_STACKSIZE    equ     100h    ; real mode (interrupt) stack size (in bytes)
PM_STACKSIZE    equ     200h    ; protected mode stack size (in bytes)
IRQ_STACKSIZE   equ     80h
MASTER_BASE     equ     68h     ; master PIC base interrupt
SLAVE_BASE      equ     70h     ; slave PIC base interrupt

; Check if there is no conflict between PIC bases and the place of
; RMCALL_VECT in the IDT and if there are N_INT gates free above RMCALL_VECT.
%if     MASTER_BASE <= RMCALL_VECT+N_INT && MASTER_BASE >= RMCALL_VECT-7
%error  "Conflict with master PIC mapping."
%endif                                   

%if     SLAVE_BASE <= RMCALL_VECT+N_INT && SLAVE_BASE >= RMCALL_VECT-7
%error  "Conflict with slave PIC mapping."
%endif

%if     SLAVE_BASE > MASTER_BASE
%define HIGHEST_INT     SLAVE_BASE+7
%else
%define HIGHEST_INT     MASTER_BASE+7
%endif
%if     RMCALL_VECT+N_INT > HIGHEST_INT
%assign HIGHEST_INT     RMCALL_VECT+N_INT
%endif

%define WINFRIENDLY     0       ; If defined -> Allocate only half of the
                                ;  possible DPMI mem under Windows
%define SWAT            0       ; If defined -> support for 386SWAT debugger

%macro  set8259vectors 2; Set new IRQ vector numbers
        mov al,15h      ; ICW1: bit 0=1 -> ICW4 needed; bit 1=0 -> cascade
        out 20h,al      ;  mode -> ICW3 needed; bit 2=1 -> 4 byte vectors;
        mov al,%1       ;  bit 4=1 -> ICW1
        out 21h,al      ; ICW2: bit 3-7 is # which is added to the IRQ # to
        mov al,4        ;  form the int # that will be called
        out 21h,al      ; ICW3: bit 2=1 -> IRQ 2 has a slave
        mov al,1 
        out 21h,al      ; ICW4: bit 0=1 -> 80x86 mode
        mov al,15h      ; If bit 2=0 -> 8 byte vectors (8080/8085)
        out 0a0h,al     ; ICW1 for second PIC
        mov al,%2   
        out 0a1h,al     ; ICW2 for second PIC
        mov al,2 
        out 0a1h,al     ; ICW3: bit 0-2 is the master IRQ which the slave
        mov al,1        ;  (second PIC) is attached to
        out 0a1h,al     ; Initialization Command Word 4 for second PIC
%endmacro

segment stackseg
; stack align=16, specified in RAW32.INC
                resb    RM_STACKSIZE    ; real mode stack

segment code16
; public align=16 use16, specified in RAW32.INC

;16-bit data
                resb    5*IRQ_STACKSIZE
IRQstackRM:                             ; RM IRQ stacks (5 nesting levels)
IRQspRM         dw      $
                dw      code16

idtrRM          dw      3ffh,0,0
errmsg0         db      'General error.$'
errmsg1         db      'Not enough conventional memory (free).$'
errmsg2         db      'Not enough extended memory (free).$'
errmsg3         db      'Could not enable A20 gate.$'
errmsg4         db      'Could not disable A20 gate.$'
errmsg5         db      'DPMI error: $'
errmsg6         db      'VCPI error: $'
errmsg7         db      'XMS error: $'
errmsg8         db      'No way found to enter protected mode.$'

DPMIcall        dd      0
DPMImem_hst     dw      0               ; mem needed by DPMI host (in paras)
DPMImem_hst_seg dw      0               ; segment of mem to provide to host
DPMImeminfo:
  .largest_free dd      0
  .max_unlocked dd      0
  .max_locked   dd      0
  .lin_addr_space dd    0
  .tot_unlocked dd      0
  .tot_free     dd      0
  .tot_physical dd      0
  .free_lin_addr_space dd 0
  .pagingfilesize dd    0
  .reserved     times 12 db 0ffh
DPMIhandle      dd      0               ; handle of allocated DPMI mem block
VCPIpage_base   dd      0
VCPIpage_top    dd      0
XMScall         dd      0
XMShandle       dw      0               ; handle of allocated XMS mem block
SXMSbit         db      0
gate            db      0
enterPM         dw      raw_PM
                dw      raw_PM
                dw      VCPI_PM
                dw      DPMI_PM
orgIVT          resb    16*4            ; original vectors (used for RMCBs)
IRQvector       resb    16*4            ; vectors of orginal RM IRQ handlers

;16-bit code
check_memory:
                                        ; Get conventional memory size
        movzx eax,word [pspbuffer+2 wrt code16] ; ax=top of DOS memory in par
        shl eax,4                       ; eax=amount of conventional memory
        sub eax,[code32a wrt code16] ; eax=mem below 640KB above start code32
        mov [lomemtop wrt code16],eax   ;  =off last byte low mem + 1
        sub eax,[lomembase wrt code16]  ; eax=mem below 640KB above end code32
        shr eax,10                      ; eax=the same, but in KB (=free conv
        cmp ax,LOWMIN                   ;  mem)
        jb near notenoughlowmem
                                        ; Get extended memory size
        clc                             ; Ensure Carry Flag set only on error
        push word codeend
        pop es
        xor di,di                       ; es:di=pointer to buffer for result
        mov edx,'PAMS'                  ; NASM's version of 'SMAP' constant
        xor ebx,ebx                     ; ebx=0 -> Start at begin of map
.fillbuffer:
        mov ecx,20                      ; ecx=num of bytes to copy to buffer
        mov eax,0e820h                  ; eax=e820h -> Get system memory map
        int 15h
        jc short .fn_e801
        add di,cx                       ; cx=number of bytes copied
        or ebx,ebx                      ; ebx=next offset to copy from or 0
        jnz short .fillbuffer           ;  if ready

        mov cx,di
        xor di,di                       ; di -> 1st Phoenix BIOS system mem
        xor eax,eax                     ;  map address range descriptor
.next_descriptor:
        cmp dword [es:di+16],1          ; descriptor+16=type of address range
        jne short .loop                 ;  1 -> Mem available to OS
        mov ebx,[es:di+8]               ; descriptor+8 (qword)=length in bytes
        cmp dword [es:di],100000h       ; descriptor+0 (qword)=base address
        jb short .loop                  ;  extended mem = mem above 1MB ;)
        add eax,ebx
        cmp dword [himembase wrt code16],0
        jne short .loop
        mov ebx,[es:di]                 ; descriptor+0 (qword)=base address       
        mov [himembase wrt code16],ebx
.loop:
        add di,20                       ; 20 = descriptor size in bytes
        cmp di,cx
        jb short .next_descriptor

        shr eax,10                      ; eax=total extended memory in KB
        jmp short .exit

.fn_e801:
        clc                             ; Clear CF of previous failed call
        mov ax,0e801h                   ; ax=e801h -> Get memory size
        int 15h
        jc short .fn_c7
        and edx,0ffffh                  ; edx=configured mem > 16M in 64KB
        shl edx,6                       ; edx=configured mem > 16M in KB
        movzx eax,cx                    ; eax=configured mem 1-16M in KB
        add eax,edx                     ; eax=total extended memory in KB
        jmp short .exit
.fn_c7:
        clc                             ; Clear CF of previous failed call
        mov ah,0c0h                     ; ah=c0h -> Get configuration
        int 15h                         ; es:bx=pointer to ROM table
        jc short .fn_88readCMOS
        test byte [es:bx+6],10h         ; If bit 4 in feature byte 2 is set
        jz short .fn_88readCMOS         ;  -> int 15h/ah=c7h is supported
        push ds
        push word codeend
        pop ds
        xor si,si                       ; ds:si=pointer to buffer
        mov ah,0c7h                     ; ah=c7h -> Return memory map info
        int 15h
        mov eax,[ds:0ah]
        add eax,[ds:0eh]                ; eax=total extended memory in KB
        pop ds
        jmp short .exit

; If int 15h/ah=88h is hooked it may not return the total amount of ext mem
; Function 88h reads from CMOS addresses 30h and 31h (=1 word -> max 64MB)
.fn_88readCMOS:
        mov al,31h              ; Write CMOS addr to read/write to port 70h
        out 70h,al              ; Read/write port 71h to get/set data
        in al,71h                       ; 31h -> MSB of total ext mem in KB
        mov ah,al       
        mov al,30h                      ; 30h -> LSB of total ext mem in KB
        out 70h,al 
        in al,71h       
        and eax,0ffffh                  ; eax=total extended memory in KB
        jnz short .exit
.fn_88:
;       clc                             ; Cleared by AND
        mov ah,88h                      ; ah=88h -> Get extended memory size
        int 15h
        jc short .errexit
        and eax,0ffffh                  ; eax=total extended memory in KB
.exit:
        mov [totalextmem wrt code16],eax
        ret
.errexit:
        mov dx,errmsg0                  ; The total amount of extended memory
        mov byte [returncode wrt code16],0ffh ;  could not be determined
        jmp exit16err

;
notenoughlowmem:
        mov dx,errmsg1
        mov byte [returncode wrt code16],0ffh
        jmp exit16err
notenoughhighmem:
        mov dx,errmsg2
        mov byte [returncode wrt code16],0ffh
        jmp exit16err
notenoughhighmemDPMI:                   ; Entry point if not enough DPMI mem
        mov word [v86r_dx wrt code16],errmsg2 ;  free; this code runs in
        mov byte [returncode wrt code16],0ffh ;  16-bit PM
        jmp exit16errDPMI
;
buildIDT:
        mov ebx,idt
        add ebx,[code32a wrt code16]
        mov [idtr+2 wrt code16],ebx     ; Set IDT base
        mov ecx,ebx
        shr ebx,4                       ; idt is already aligned on para
        mov es,bx                       ; es=segment of IDT

        mov ax,code32dsc-gdt            ; ax=code32sel
        shl eax,16
        mov ax,unexp                    ; eax=LSD of int gate to unexp int
        xor di,di
        mov cx,HIGHEST_INT+1
.nextLSD:                               ; Set least significant dword of all
        mov [es:di],eax                 ;  IDT entries
        add di,8
        loop .nextLSD
        mov eax,8e00h                   ; eax=MSD of int gate (Present, DPL 0)
        mov di,4
        mov cx,HIGHEST_INT+1
.nextMSD:                               ; Set most significant dword of all
        mov [es:di],eax                 ;  IDT entries
        add di,8
        loop .nextMSD

        mov ax,code32
        mov fs,ax                       ; offset table resides in code32
                                     
        mov bx,excoff                   ; Set exception handler offsets
        mov cx,17
        xor di,di
.next_exc:
        mov ax,[fs:bx]
        mov [es:di],ax
        add bx,2
        add di,8
        loop .next_exc

        mov bp,2
        mov bx,IRQoff                   ; Set IRQ handler offsets
        mov di,MASTER_BASE*8
.set8IRQs:
        mov cx,8
.next_IRQ:
        mov ax,[fs:bx]
        mov [es:di],ax
        add bx,2
        add di,8
        loop .next_IRQ

        mov di,SLAVE_BASE*8
        dec bp
        jnz short .set8IRQs

        ret
;
getIRQvectors:                          ; Get vectors of RM IRQ handlers
        push ds                         ;  (necessary for RM_IRQ) and store
        push ds                         ;  them in 'IRQvector'
        pop es
        push word 0
        pop ds                          ; IVT starts at linear address 0
        mov bp,2                        ; 2 PICs -> Get 2 * 8 vectors
        mov di,IRQvector
        mov si,8*4

.get8vectors:
        mov cx,8                        ; 8 IRQs per PIC
.next_vector:
        lodsd                           ; Get vector (segment:offset)
        stosd                           ; Store vector in IRQvector
        loop .next_vector

        mov si,70h*4
        dec bp
        jnz short .get8vectors

        pop ds

        ret
;
init_callbacks:
        call getIRQvectors

        push word 0   
        pop es                          ; es=segment of RM IVT
        mov ax,code32
        mov fs,ax                       ; offset table resides in code32
        mov ax,code16
        shl eax,16
        mov bp,2                        ; 2 PICs -> Set 2 * 8 callbacks
        mov bx,callbackoff
        mov si,orgIVT
        mov di,MASTER_BASE*4            ; offset in IVT of vector for int

.set8callbacks:
        mov cx,8
.next_callback:
        mov edx,[es:di]                 ; Read vector of orginal IRQ handler
        mov [si],edx                    ; Save vector
        mov ax,[fs:bx]                  ; Read offset of new IRQ handler
        stosd                           ; Install new IRQ handler (callback)
        add si,4
        add bx,2
        loop .next_callback

        mov di,SLAVE_BASE*4
        dec bp
        jnz short .set8callbacks

        ret
;
restoreIVT:
        push word 0
        pop es
        mov bp,2
        cld
        mov si,orgIVT
        mov di,MASTER_BASE*4

.set8vectors:
        mov cx,8
.next_vector:
        lodsd
        stosd
        loop .next_vector

        mov di,SLAVE_BASE*4
        dec bp
        jnz short .set8vectors

        ret
;
check_V86:                              ; Check if running in V86 mode
        smsw ax                         ; SMSW is not PL sensitive (CR0 is)
        test al,1                       ; If Protection Enable bit is set ->
        jz short .not_V86               ;  running in protected mode; prog
.V86:                                   ;  runs under DOS -> must be V86 mode
        stc
        ret
.not_V86:
;       clc                             ; Cleared by TEST
        ret
;
check_windows:
        mov ax,4680h                    ; ax=4680h -> Windows 3.0
        int 2fh                         ;  installation check
        cmp ax,0                        ; ax=0 -> Windows 3.0 running
        je short .windows
        mov ax,1600h                    ; al=0 -> Get installed state, ah=16h
        int 2fh                         ;  -> Windows 386
        test al,7fh                     ; al=1/ffh/version number -> Windows
        jnz short .windows              ; al=80h -> XMS 1
        mov ax,3306h                    ; ax=3306 -> Get true version number
        mov bx,0                        ;  (DOS 5+)
        int 21h
        cmp bx,3205h                    ; bx=3205h -> Windows NT
        je short .windows
.no_windows:
        mov byte [windows wrt code16],0
        ret
.windows:
        mov byte [windows wrt code16],1
        ret
;
rawinit:
        call buildIDT                   ; es=segment of IDT
        mov word [es:RMCALL_VECT*8],RMcall_int
%ifdef  SWAT
        call SWATinit
%endif
        cmp dword [himembase wrt code16],0 ; Check if check_memory has
        jne short .himembase_done       ;  already set himembase
        mov dword [himembase wrt code16],100000h ; =address of begin mem block
.himembase_done:
        mov eax,[code32a wrt code16]
        sub [himembase wrt code16],eax  ; =offset start block from code32

        mov eax,[totalextmem wrt code16]
        cmp eax,HIGHMIN
        jb near notenoughhighmem
        shl eax,10                      ; eax=amount of extended mem in bytes
        add eax,[himembase wrt code16]
        mov [himemtop wrt code16],eax   ; =offset of highest byte + 1 in mem
                                        ;  from start code32
        call init_callbacks
        
        mov ax,[code16sel wrt code16]
        mov [exitPM32+4 wrt code16],ax
        mov dword [exitPM32 wrt code16],rawexit

        call enableA20

        ret
;
enableA20:
        mov byte [gate],2               ; 2 -> Enable A20
        call gateA20
        jc short .notenabledA20
        ret
.notenabledA20:
        mov dx,errmsg3
        mov byte [returncode wrt code16],0ffh
        jmp exit16err
;
disableA20:
        mov byte [gate],0               ; 0 -> Disable A20
        call gateA20
        jc short .notdisabledA20
        ret
.notdisabledA20:
        mov dx,errmsg4
        mov byte [returncode wrt code16],0ffh
        jmp exit16err
;
gateA20:
        xor ax,ax                       ; Set fs to 0 and gs to ffffh for
        mov fs,ax                       ;  testA20
        dec ax
        mov gs,ax

        call testA20                    ; Is A20 enabled or disabled?
        call isA20ok                    ; Is that right?
        jnc near .gateA20done           ; If yes, done

        in al,92h                       ; PS/2 A20 gate
        and al,0fdh                     ; Clear bit 1 (=gate A20)
        or al,[gate]                    ; Set A20 bit or leave it 0
        out 92h,al                      ; 2 -> Enable A20; 0 -> Disable A20

        call testA20                    ; Is A20 enabled or disabled?
        call isA20ok                    ; Is that right?
        jnc short .gateA20done          ; If yes, done

                                        ; A20 gate via 8042 (=keyb controller)
        call empty_8042                 ; Ensure 8042 input buf is empty prior
        jnz short .gateA20fail          ;  to writing to the input registers
        mov al,0adh                     ; adh -> Disable keyboard interface
        out 64h,al                      ; 64h = 8042 command register

        call empty_8042
        jnz short .gateA20fail
        mov al,0d0h                     ; d0h -> Read 8042 output port; value
        out 64h,al                      ;  in output port is placed in output
                                        ;  register
        xor cx,cx
.wait8042:
        in al,64h                       ; Read 8042 status register
        test al,1                       ; Output reg (60h) has data for sys?
        loopz .wait8042                 ; If not (yet) -> Loop
        jz short .gateA20fail

        in al,60h                       ; Data in 8042 output reg -> Read it
        push ax                         ; Save contents of 8042 output port

        call empty_8042
        pop ax                          ; If error -> valid return address
        jnz short .gateA20fail          ;  on stack
        push ax
        mov al,0d1h                     ; d1h -> Write to output port of 8042
        out 64h,al                      ;  (through port 60h)
                                  
        call empty_8042
        pop ax
        jnz short .gateA20fail
        and al,0fdh                     ; Clear bit 1 (=gate A20)
        or al,[gate]                    ; Set A20 bit or leave it 0
        out 60h,al                      ; 60h = 8042 output port (now)

        call empty_8042
        jnz short .gateA20fail
        mov al,0aeh                     ; aeh -> Enable keyboard interface
        out 64h,al

                                        ; Wait for 8042 to gate A20
        mov al,36h                      ; Select counter 0; write LSB, MSB;
        out 43h,al                      ;  cntr mode 3; 16-bit binary counter
        mov al,0                        ; Set counter value to 65536; no check
        out 40h,al                      ;  is necessary if wrap takes place ->
        out 40h,al                      ;  16-bit register wrap does the job
                                        ; 65536 is default counter value too
        mov cx,30                       ; Wait maximum 30 ticks:
.anothertick:                           ;  30 * 1/(1193181 ticks/s)  25 s
        mov al,0d2h                     ; Read back command; bit 5=0 -> Latch
        out 43h,al                      ;  counter; bit 4!=0 -> Don't latch
        in al,40h                       ;  status; bit 1=1 -> Select cntr 0;
        mov dl,al                       ;  bit 0 reserved. Writing 0 instead
        in al,40h                       ;  of read back command also works.
        mov dh,al
.nexttick:                              ; Wait at least 1 PIT clock tick
        mov al,0d2h
        out 43h,al
        in al,40h                       ; Get LSB of current tick counter
        mov ah,al
        in al,40h                       ; Get MSB of current tick counter
        xchg ah,al                      ; If ax is greater than dx (instead
        cmp ax,dx                       ;  of the opposite), dx will still
        je short .nexttick              ;  contain the right value because of
        sub dx,ax                       ;  16-bit reg wrap (e.g. 1-ffff=2)
        sub cx,dx                       ; Substract the number of passed
        jnc short .anothertick          ;  clock ticks between 2 port reads

        call testA20                    ; Is A20 enabled or disabled?
        call isA20ok                    ; Is that right?
        jnc short .gateA20done

.gateA20fail:
        stc                             ; Error -> Carry Flag set
.gateA20done:
        ret
;
testA20:
        mov al,[fs:0]                   ; Get byte from 0:0
        mov ah,al                       ; Save byte
        not al                          ; Modify byte
        xchg al,[gs:10h]                ; If A20 is off -> ffffh:10h = 0:0
        cmp ah,[fs:0]                   ; 0:0 has changed only if A20 is off
        mov [gs:10h],al                 ; Restore byte at ffffh:10h
        ret                             ; Zero Flag is set if A20 is enabled
;
isA20ok:                                ; Carry Flag set = A20 is not ok
        pushf                           ; Save result, zero -> A20 is on
        test byte [gate],2              ; Must A20 be enabled or disabled?
        jz short .disableA20            ; If gate != 2 -> must be disabled
        popf
        clc                             ; Ensure Carry Flag set only on error
        jz short .ok                    ; Must be on, it is on -> A20 is ok
        jmp short .notok
.disableA20:
        popf
        clc
        jnz short .ok                   ; Must be off, it is off -> A20 is ok
        jmp short .notok
.notok:
        stc                             ; Set Carry Flag
.ok:
        ret                             ; Leave Carry Flag cleared
;
empty_8042:                             ; Wait for safe to write to 8042
        xor cx,cx                       ;  (= keyboard controller)
.try_8042:
        in al,64h                       ; Read 8042 status register
        test al,2                       ; Input reg (60/64) has data for 8042?
        loopnz .try_8042                ; If yes -> Loop
        ret
;
check_XMS:
        mov ax,4300h                    ; ax=4300h -> Check XMS installation
        int 2fh
        cmp al,80h                      ; XMS available?
        je short .XMS
.no_XMS:
        stc
        ret
.XMS:
;       clc                             ; Cleared by CMP
        ret
;
XMSinit:
        cmp byte [systemtype wrt code16],2; Check if called by VCPIinit. If so
        je short .VCPIinit              ;  -> already build IDT
        call buildIDT                   ; es=segment of IDT
        mov word [es:RMCALL_VECT*8],RMcall_int
%ifdef  SWAT
        call SWATinit
%endif
.VCPIinit:

        mov ax,4310h                    ; ax=4310h -> Get XMS driver entry
        int 2fh                         ;  point
        mov [XMScall],bx                ; bx=entry point offset
        mov [XMScall+2],es              ; es=entry point segment

        mov ah,0                        ; ah=0 -> Get XMS version number
        call far [XMScall]
        cmp ah,3                        ; ah=XMS version number (major)
        jb short .checkmem              ; SXMS (4GB) available from XMS 3
        mov byte [SXMSbit],80h          ; Call with bit 7 set in al -> SXMS

.checkmem:
        mov eax,800h                    ; ah=8 -> Query free extended memory
        or ah,[SXMSbit]                 ;  (Clear high word for if no SXMS)
        call far [XMScall]              ; When high bit of bl is set ->
        test bl,80h                     ;  bl=error code
        mov dx,errmsg7                  ; eax=largest block size available in
        mov byte [returncode wrt code16],1 ;  KB
        jnz near exit16err              ; edx=total extended memory in KB
        mov [XMSmem wrt code16],eax
        cmp eax,HIGHMIN
        jb near notenoughhighmem
;       cmp eax,0                       ; Not necessary; XMS driver returns
;       je near notenoughhighmem        ;  an error when no free XMS mem left
        mov [himemtop wrt code16],eax   ; =now only _size_ of mem block
        shl dword [himemtop wrt code16],10

        mov edx,eax                     ; edx=amount of ext mem wanted, in KB
        mov ah,9                        ; ah=9 -> Allocate extended mem block
        or ah,[SXMSbit]
        call far [XMScall]              ; dx=block handle
        mov [XMShandle],dx
        cmp al,0                        ; Error?
        mov dx,errmsg7
        mov byte [returncode wrt code16],2
        je near exit16err

        mov dx,[XMShandle]              ; dx=block handle
        mov ah,0ch                      ; ah=0ch -> Lock extended mem block
        call far [XMScall]
        push dx                         ; Save dx for when no error
        cmp al,0                        ; Error?
        mov dx,errmsg7
        mov byte [returncode wrt code16],3
        je near exit16err
                                        ; dx:bx=32-bit physical block address
        pop dx                          ;  after dx has been restored
        shrd eax,edx,16                 ; physical block address, high word
        mov ax,bx                       ; physical block address, low word
        sub eax,[code32a wrt code16]
        mov [himembase wrt code16],eax  ; =offset start block from code32
        add [himemtop wrt code16],eax   ; =offset of 1st byte above block
                                        ;  from start code32

        cmp byte [systemtype wrt code16],2
        je short .VCPIinit2
        call init_callbacks
.VCPIinit2:

        mov ah,5                        ; ah=5 -> Local enable A20 line
        call far [XMScall]
        cmp al,0                        ; Error?
        mov dx,errmsg3
        mov byte [returncode wrt code16],0ffh
        je near exit16err

        mov ax,[code16sel wrt code16]
        mov [exitPM32+4 wrt code16],ax
        mov dword [exitPM32 wrt code16],XMSexit

        ret
;
check_VCPI:
        push word 0
        pop es
        cmp dword [es:4*67h],0          ; Check if int 67h handler exists
        je short .no_VCPI
        mov ax,0de00h                   ; al=0 -> VCPI installation check
        int 67h
        cmp ah,0                        ; No error?
        je short .VCPI
.no_VCPI:
        stc
        ret
.VCPI:
;       clc                             ; Cleared by CMP
        ret
;
VCPIinit:
        mov eax,0de0ah                  ; al=0ah -> Get 8259A int mappings
        int 67h
        cmp ah,0                        ; No error?
        mov dx,errmsg6
        mov byte [returncode wrt code16],1
        jne near exit16err
        mov [omPICbase wrt code16],bl
        mov [osPICbase wrt code16],cl

        call buildIDT                   ; es=segment of IDT
        mov word [es:RMCALL_VECT*8],VCPIcall_int

        mov eax,[code32a wrt code16]
        add [VCPIidtr wrt code16],eax
        add [VCPIgdtr wrt code16],eax
                                        ; Set up temporary page directory &
        add eax,[lomembase wrt code16]  ;  temporary page tables
        add eax,0fffh                   ; Align page directory on 4K boundary
        and ax,0f000h
        mov [VCPIdir_base wrt code16],eax
        shr eax,4
        mov es,ax
                                        ; Clear page directory
        xor di,di
        mov cx,4096/4
        xor eax,eax
        rep stosd
                                        ; Get current 1st page table (0-4MB)
        mov di,4096                     ; es:di=ptr to 4KB page table buffer
        mov si,VCPIdsc0 wrt code16      ; ds:si is filled in by server (3 dsc)
        mov ax,0de01h                   ; al=1 -> Get PM interface
        int 67h
        cmp ah,0                        ; No error?
        mov dx,errmsg6                  ; di=first free entry in page table
        mov byte [returncode wrt code16],2
        jne near exit16err
        mov [VCPIcall wrt code16],ebx   ; ebx=offset server's PM entry point
                                        ;  (in VCPIdsc0)
        and edi,0ffffh                  ; High word is undefined -> Clear it
%ifdef  SWAT
        push di                         ; SWATinit destroys di and es, but
        push es                         ;  register's values are needed again
        call SWATinit
        pop es
        pop di
%endif
        mov [VCPIpage_base wrt code16],edi ; Save entry address where own
        mov eax,edi                     ;  made page table entries start
        sub eax,4096                    ; eax=first free entry in page table
        shl eax,10                      ;    =number of used 1KB blocks
        sub eax,[code32a wrt code16]
        mov [himembase wrt code16],eax  ; =offset start block from code32
                                        ;  (linear address)

                                        ; Allocate new pages
        call check_XMS                  ; Check for XMS to do XMS mem alloc
        jnc short .XMSalloc             ;  for XMS mem allocation is _much_
                                        ;  faster than VCPI mem allocation.
                                        ;  XMS can be > 32768, many VCPI's not
                                        ;  -> Avail. mem not limited to VCPI

        mov ax,0de03h                   ; al=3 -> Get number of free 4KB pages
        int 67h
        push dx                         ; Save dx for when no error
        cmp ah,0                        ; No error?
        mov dx,errmsg6
        mov byte [returncode wrt code16],3
        jne near exit16err
        pop dx                          ; Restore dx (edx=number of pages)
        shl edx,2                       ; edx=free VCPI mem in KB
        mov [VCPImem wrt code16],edx
        cmp edx,HIGHMIN+12
        jb near notenoughhighmem

        mov cx,di                       ; cx=first free entry in page table
        add cx,3*4                      ; 3 pages; 1 for page dir, 1 for
.next_page:                             ;  server's page, 1 to support 4GB
        mov ax,0de04h                   ; al=4 -> Allocate a 4KB page
        int 67h
        cmp ah,0                        ; No error?
        jne short .last_page            ; edx=physical page address
        and dx,0f000h                   ; Clear page attributes
        or edx,7                        ; User (=page addressable regardless
        mov [es:di],edx                 ;  the CPL); Read and write; Present
        add di,4
        cmp di,cx
        jb short .next_page
.last_page:                             ; edi=first free entry in page table
        cmp di,cx                       ; Check if the 3 pages are actually
        mov dx,errmsg6                  ;  allocated or a page fault will be
        mov byte [returncode wrt code16],4 ;  generated when in VCPIinitPM
        jb near exit16err
        mov [VCPIpage_top],edi
        jmp short .setup_pagedir

.XMSalloc:
        push dword [himembase wrt code16] ; =linear address of block
        push es                         ; XMSinit destroys es
        call XMSinit
        pop es
        cmp dword [XMSmem wrt code16],HIGHMIN+12
        jb near notenoughhighmem
        mov edx,[himembase wrt code16]
        add edx,[code32a wrt code16]    ; edx=phys addr of start XMS block
        or edx,7                        ; User (=page addressable regardless
        pop dword [himembase wrt code16];  the CPL); Read and write; Present
        mov cx,di                       ; cx=first free entry in page table
        add cx,3*4                      ; 3 pages; 1 for page dir, 1 for
.next_XMSpage:                          ;  server's page, 1 to support 4GB
        mov [es:di],edx
        add edx,4096                    ; Next page resides 4K higher in mem
        add di,4
        cmp di,cx
        jb short .next_XMSpage          ; edi=first free entry in page tables
        mov [VCPIpage_top],edi
                                        ; Set up page directory address (CR3)
.setup_pagedir:
        mov ebx,[VCPIdir_base wrt code16]
        shr ebx,12                      ; number of 4KB page where dir in mem
        mov eax,[es:ebx*4 + 4096]       ; Get page dir address in page table
        and ax,0f000h                   ; 12 lowest bits reserved by Intel
        mov [VCPIcr3 wrt code16],eax
                                        ; Set up page directory
        xor di,di
        mov cx,2                        ; 2 page tbl; 1 from VCPI server + 1
.next_table:                            ;  for 4MB to store page tbls for 4GB
        inc ebx                         ; next page directory entry
        mov eax,[es:ebx*4 + 4096]       ; Get address of next page table
        and ax,0f000h                   ; Clear page attributes
        or eax,7                        ; User (=page addressable regardless
        stosd                           ;  the CPL); Read and write; Present
        loop .next_table

        call init_callbacks

        mov ax,[VCPIsel0 wrt code16]
        mov [VCPIcall+4 wrt code16],ax
        mov eax,[VCPIcr3 wrt code16]
        mov [mainTSS.@cr3 wrt code16],eax

        mov ax,[mainTSSsel wrt code16]
        mov [VCPItask wrt code16],ax

        mov ax,[code16sel wrt code16]
        mov [exitPM32+4 wrt code16],ax
        mov dword [exitPM32 wrt code16],VCPIexit

        ret
;
VCPIinitPM:
                                        ; Copy page dir & 1st 2 page tables
        mov esi,[VCPIdir_base wrt code16] ;  from conv to extended memory
        mov edi,[himembase wrt code16]
        add edi,[code32a wrt code16]
        mov [VCPIdir_base wrt code16],edi

        mov ecx,[VCPIpage_top]          ; ecx=amount mem for page dir/tables
        shr ecx,2
        mov ds,[cs:zerosel wrt code16]
        mov es,[cs:zerosel wrt code16]
        cld
        a32                             ; Address-size prefix -> Move ecx (!)
        rep movsd                       ;  dwords from ds:esi to es:edi

                                        ; Set up page directory address (CR3)
        mov ds,[cs:data16sel wrt code16]
        mov esi,[VCPIdir_base wrt code16]
        mov ebx,esi
        shr ebx,12                      ; number of 4KB page where dir in mem
        mov eax,[es:ebx*4 + esi+4096]   ; Get page dir address in page table
        and ax,0f000h                   ; 12 lowest bits reserved by Intel
        mov [VCPIcr3 wrt code16],eax
        mov [mainTSS.@cr3 wrt code16],eax
        mov cr3,eax                     ; Make page dir & page tables active

        mov edi,esi                     ; Set up page dir's 1st 2 entries
        mov ecx,2                       ;  (make entries point to page tables
.next_table:                            ;  in ext mem instead of conv mem)
        inc ebx                         ; next page directory entry
        mov eax,[es:ebx*4 + esi+4096]   ; Get address of next page table
        and ax,0f000h                   ; Clear page attributes
        or eax,7                        ; User (=page addressable regardless
        a32                             ;  the CPL); Read and write; Present
        stosd                           ; stosd at es:edi (above 1MB ;)
        loop .next_table

        cmp dword [XMSmem wrt code16],0 ; Check if memory has already been
        jne short .XMSpages             ;  allocated through XMS -> Make page
                                        ;  tables for it

        in al,0a1h                      ; Ugly, ugly, ugly...
        shl ax,8
        in al,21h
        mov dx,ax                       ; set8259vectors destroys ax
        set8259vectors MASTER_BASE,SLAVE_BASE
        mov ax,dx
        out 21h,al
        shr ax,8
        out 0a1h,al
                                        ; Allocate memory & create page
        mov edi,[VCPIpage_top]          ;  tables
        add edi,esi                     ; esi=VCPIdir_base
        mov ecx,edi                     ; ecx=first free entry in page table
        add ecx,[VCPImem wrt code16]    ; Amount of mem allocated is diff
.next_page:                             ;  between ecx and edi
        mov ax,0de04h                   ; al=4 -> Allocate a 4KB page
        cli                             ; Must call server with ints disabled
        call dword far [VCPIcall wrt code16] ; If much mem ints must be on
        sti                             ;  or else no keyboard after alloc
        cmp ah,0                        ; No error?
        jne short .last_page            ; edx=physical page address
        and dx,0f000h                   ; Clear page attributes
        or edx,7                        ; User (=page addressable regardless
        mov [es:edi],edx                ;  the CPL); Read and write; Present
        add edi,4
        cmp edi,ecx
        jb short .next_page
.last_page:                             ; edi=first free entry in page tables
        sub edi,esi                     ; esi=VCPIdir_base
        mov [VCPIpage_top],edi

        jmp short setup_pagedir
.XMSpages:                              ; Create page tables for allocated
        mov edi,[VCPIpage_top]          ;  XMS memory
        add edi,esi                     ; esi=VCPIdir_base
        mov ecx,[XMSmem wrt code16]
        sub ecx,3*4                     ; Already made 3 page table entries   
        add ecx,edi                     ; Amount mem that page table entries
                                        ;  are created for is diff ecx and edi
        mov edx,[es:edi-4]              ; edx=phys addr of last allocated page
        add edx,4096                    ; Next page resides 4K higher in mem
        or edx,7                        ; User (=page addressable regardless
.next_XMSpage:                          ;  the CPL); Read and write; Present
        mov [es:edi],edx
        add edx,4096                    ; Next page resides 4K higher in mem
        add edi,4
        cmp edi,ecx
        jb short .next_XMSpage
        sub edi,esi                     ; esi=VCPIdir_base
        mov [VCPIpage_top],edi          ; edi=first free entry in page tables

setup_pagedir:
        mov eax,[himembase wrt code16]
        add eax,edi                     ; Save page directory and page tables
        mov [himembase wrt code16],eax

        sub edi,4096                    ; edi=first free entry in page tables
        shl edi,10                      ;    =number of used 1KB blocks
        sub edi,[code32a wrt code16]    ; =offset of 1st byte above block
        mov [himemtop wrt code16],edi   ;  from start code32

                                        ; Set up page directory
        mov ebx,esi                     ; esi=VCPIdir_base
        shr ebx,12                      ; number of 4KB page where dir in mem
        mov edi,esi
        mov ecx,[VCPIpage_top]          ; Round amount up to 4MB units (-4K
        dec ecx                         ;  for dir's size; +4K-1 to round);
        shr ecx,12                      ;  dir entries are done at 4MB incr
.next_table:                            ; ecx=amount mem in 4MB units
        inc ebx                         ; next page directory entry
        mov eax,[es:ebx*4 + esi+4096]   ; Get address of next page table
        and ax,0f000h                   ; Clear page attributes
        or eax,7                        ; User (=page addressable regardless
        a32                             ;  the CPL); Read and write; Present
        stosd                           ; stosd at es:edi (above 1MB ;)
        loop .next_table

        ret
;
check_DPMI:
        mov ax,1687h                    ; Get RM to PM switch entry point
        int 2fh                         ;  (also check if DPMI present)
        cmp ax,0                        ; No error?
        jne short .no_DPMI
        test bx,1                       ; 32-bit programs are supported?
        mov dx,errmsg5
        mov byte [returncode wrt code16],1
        jz near exit16err               ; DPMI, but not 32-bit
        mov [DPMIcall],di               ; di=entry point offset
        mov [DPMIcall+2],es             ; es=entry point segment
        mov [DPMImem_hst],si            ; mem needed by DPMI host (in paras)
        jmp short .DPMI
.no_DPMI:
        stc
        ret
.DPMI:
;       clc                             ; Cleared by TEST
        ret
;
DPMIinit:
        mov eax,[lomembase wrt code16]
        add eax,[code32a wrt code16]
        add eax,0fh                     ; Align lomembase on paragraph
        and al,0f0h                     ;  boundary; Needed for
                                        ;  DPMImem_hst_seg
        shr eax,4
        mov [DPMImem_hst_seg],ax
        shl eax,4
        sub eax,[code32a wrt code16]    ; eax=low memory base
        movzx ebx,word [DPMImem_hst]
        shl ebx,4                       ; ebx=mem needed by host (in bytes) 
        add eax,ebx
        cmp eax,[lomemtop wrt code16]
        ja near notenoughlowmem
        mov [lomembase wrt code16],eax

        ret
;
DPMIinitPM:                             ; This part can be done in PM only
        mov [code16sel wrt code16],cs   ; Save 16-bit selectors
        mov [data16sel wrt code16],ds
        mov [pspsel wrt code16],es
                                        ; Allocate & initialize descriptors
        xor ax,ax                       ; ax=0 -> Allocate LDT descriptor
        mov cx,4                        ; cx=number of descriptors to allocate
        int 31h
        mov word [v86r_dx wrt code16],errmsg5
        mov byte [returncode wrt code16],3
        jc near exit16errDPMI

        mov [code32sel wrt code16],ax   ; ax=first selector in array of dsc
        mov ax,3                        ; ax=3 -> Get next sel increment value
        int 31h
        mov bx,[code32sel wrt code16]   ; ax=value to add to get to next sel
        add bx,ax                       ; bx=2nd sel
        mov [data32sel wrt code16],bx
        add bx,ax                       ; bx=3rd sel
        mov [zerosel wrt code16],bx
        add bx,ax                       ; bx=4th sel
        mov [envsel wrt code16],bx

        lar cx,[code16sel wrt code16]   ; Load access rights byte in cx
        and cx,6000h                    ; Only the DPL bits are needed
        mov ax,0ch                      ; ax=0ch -> Set descriptor
        mov es,[data16sel wrt code16]   ; es:edi=pointer to 8 byte dsc buffer
        mov edi,code32dsc wrt code16
        or [code32dsc.access wrt code16],ch ; Set the DPL bits to the CPL
        mov bx,[code32sel wrt code16]   ; bx=selector
        int 31h                         ; Set descriptor
        mov byte [returncode wrt code16],4
        jc near exit16errDPMI

        mov edi,data32dsc wrt code16
        or [data32dsc.access wrt code16],ch ; Set the DPL bits to the CPL
        mov bx,[data32sel wrt code16]   ; bx=selector
        int 31h                         ; Set descriptor
        mov byte [returncode wrt code16],5
        jc near exit16errDPMI

        mov edi,zerodsc wrt code16
        or [zerodsc.access wrt code16],ch ; Set the DPL bits to the CPL
        mov bx,[zerosel wrt code16]     ; bx=selector
        int 31h                         ; Set descriptor
        mov byte [returncode wrt code16],6
        jc near exit16errDPMI

        mov edi,envdsc wrt code16
        or [envdsc.access wrt code16],ch ; Set the DPL bits to the CPL
        mov bx,[envsel wrt code16]      ; bx=selector
        int 31h                         ; Set descriptor
        mov byte [returncode wrt code16],7
        jc near exit16errDPMI

        mov ax,500h                     ; ax=500h -> Get free mem information
        mov edi,DPMImeminfo             ; es:edi=sel:offset of 30h buffer
        int 31h
        mov byte [returncode wrt code16],8
        jc near exit16errDPMI
        cmp dword [DPMImeminfo.max_locked],-1
        je short .novirtualmem

        mov ax,604h                     ; ax=604h -> Get page size
        int 31h                         ; bx:cx=page size in bytes
        mov byte [returncode wrt code16],9
        jc near exit16errDPMI
        shl ebx,16
        mov bx,cx                       ; ebx=page size
        mov edx,ebx
        shr edx,10                      ; edx=page size in KB
        push edx                        ; Save edx ("mul ebx" detroys edx)
        mov eax,[DPMImeminfo.max_locked]
        mul ebx                         ; eax=DPMImem in bytes
        shr eax,10                      ; DPMImem in KB
%ifdef  WINFRIENDLY
        cmp byte [windows wrt code16],0 ; Allocating all avail mem under Win
        je short .no_windows            ;  and locking it slows down the
        shr eax,1                       ;  system terribly :(
.no_windows:
%endif
        mov [DPMImem wrt code16],eax
        pop edx
        jmp short .DPMIalloc
.novirtualmem:                          ; Host does not support virtual mem
        mov eax,[DPMImeminfo.largest_free] ;  -> only this field is filled in
        shr eax,10                      ; DPMI mem in KB
        mov [DPMImem wrt code16],eax
        mov edx,4                       ; Assume a default page size of 4KB

.DPMIalloc:
        shl eax,10
        mov cx,ax
        shld ebx,eax,16                 ; bx:cx=size wanted in bytes
        mov ax,501h                     ; ax=501h -> Allocate mem block
        int 31h                         ; bx:cx=linear address of block
        jnc short .mem_allocated        ; Some DPMI hosts (e.g. Windows 95
        sub [DPMImem wrt code16],edx    ;  when virtual mem is turned off)
        mov eax,[DPMImem wrt code16]    ;  return an amount of max lockable
        ja short .DPMIalloc             ;  mem that can't be allocated -> Try
        mov byte [returncode wrt code16],10 ;  to alloc a smaller block until
        jmp exit16errDPMI               ;  no error

.mem_allocated:
        mov [DPMIhandle+2],si           ; si:di=memory block handle
        mov [DPMIhandle],di

.lock_mem:
        mov esi,[DPMImem wrt code16]    ; bx:cx=linear address of mem to lock
        shl esi,10
        mov di,si                       ; si:di=size of block to lock (bytes)
        shr esi,16
        mov ax,600h                     ; ax=600h -> Lock linear region
        int 31h
        jnc short .mem_locked           ; Some DPMI hosts (e.g. CWSDPMI in
        sub [DPMImem wrt code16],edx    ;  certain cases) return an amount of
        ja short .lock_mem              ;  max lockable mem that can't be
        mov byte [returncode wrt code16],11 ;  locked -> Try to lock a
        jmp exit16errDPMI               ;  smaller block until no error

.mem_locked:
        mov eax,[DPMImem wrt code16]
        cmp eax,HIGHMIN
        jb near notenoughhighmemDPMI
;       cmp eax,0                       ; Not necessary; If no mem left to
;       je near notenoughhighmemDPMI    ;  alloc program already exited. The
        shl eax,10                      ;  same applies to locking mem.
        mov [himemtop wrt code16],eax   ; Now only _size_ of block

                                        ; Resize the allocated memory block
        mov cx,ax                       ;  to the size of the locked block.
        shld ebx,eax,16                 ; bx:cx=new size wanted in bytes
        mov si,[DPMIhandle+2]           ; si:di=handle of block to resize
        mov di,[DPMIhandle]
        mov ax,503h                     ; ax=503h -> Resize mem block
        int 31h
        mov byte [returncode wrt code16],12
        jc near exit16errDPMI

        mov [DPMIhandle+2],si           ; si:di=new handle of memory block
        mov [DPMIhandle],di
        shl ebx,16                      ; bx:cx=new linear address of block
        mov bx,cx
        sub ebx,[code32a wrt code16]
        mov [himembase wrt code16],ebx
        add [himemtop wrt code16],ebx

        mov ax,205h                     ; ax=205h -> Set PM interrupt vector
        mov bl,RMCALL_VECT              ; bl=interrupt number
        mov cx,[code32sel wrt code16]   ; cx:edx=addr of exception handler
        mov edx,DPMIcall_int
        int 31h
        mov byte [returncode wrt code16],13
        jc near exit16errDPMI

        mov [exitPM32+4 wrt code16],cs
        mov dword [exitPM32 wrt code16],DPMIexit

        mov ax,[data32sel wrt code16]
        mov ds,ax
        mov es,[pspsel]
        mov fs,ax
        mov gs,[zerosel]

        mov ax,400h                     ; ax=400h -> Get DPMI version info
        int 31h
        mov [mPICbase],dh               ; dh=value of master PIC base int
        mov [sPICbase],dl               ; dl=value of slave PIC base int

        ret
;
%ifdef  SWAT
SWATinit:
        push word 0
        pop es
        cmp dword [es:4*67h],0          ; Check if int 67h handler exists
        je short .exit

        mov ax,0def0h                   ; al=f0h -> Determine dbgr presence
        int 67h
        cmp ah,0                        ; No error?
        jne short .exit

        mov ax,0def2h                   ; al=f2h -> Init debugger interface
        push word code32
        pop es
        mov di,SWATdsc                  ; es:di=addr of GDT entries for dbgr
        mov bx,SWATdsc-gdt              ; bx=first selector for debugger
        int 67h
        cmp ah,0                        ; No error?
        mov dx,errmsg0
        mov byte [returncode wrt code16],254 ; Not quite a "general error"
        jne near exit16err

        mov eax,[idtr+2 wrt code16]
        shr eax,4
        mov es,ax
        xor di,di                       ; es:di=addr IDT entry for exc # bx
        xor bx,bx                       ; Start with exception 0
        mov cx,32                       ; Try to set all exception handlers
.nextexc:
        mov ax,0def3h                   ; al=f3h -> Initialize dbgr IDT entry
        int 67h                         ; no err check; init what can be init
        inc bx                          ; next exception
        add di,8                        ; nxt IDT entry 8 bytes higher in mem
        loop .nextexc

.exit:
        ret
%endif
;
..start:
        mov [cs:psp_seg wrt code16],es
        mov ax,[es:2ch]
        mov [cs:env_seg wrt code16],ax

        mov ax,es                       ; Save Program Segment Prefix
        mov ds,ax
        xor si,si
        mov ax,cs
        mov es,ax
        mov di,pspbuffer wrt code16
        mov cx,256/4
        cld
        rep movsd

        mov eax,code16
        mov ds,ax
        shl eax,4                       ; eax=linear address of code16
        mov [code16a wrt code16],eax
        mov eax,code32                  
        shl eax,4                       ; eax=linear address of code32
        mov [code32a wrt code16],eax
        mov ebx,codeend
        shl ebx,4                       ; ebx=linear address of codeend
        sub ebx,eax                     ; ebx=offset 1st byte above end
        mov [lomembase wrt code16],ebx  ;  code32 from start code32
        call check_memory

        cli
        call check_VCPI                 ; Check VCPI first (Privilege Level
        jc short .no_VCPI               ;  0), DPMI last (nearly always PL 3)
        mov byte [systemtype wrt code16],2
        call VCPIinit
        jmp short .setup_dsc
.no_VCPI:
        call check_V86                  ; Check if the processor is already
        jc short .check_DPMI            ;  in PM -> Must use interface to
        call check_XMS                  ;  enter PM
        jc short .no_XMS
        mov byte [systemtype wrt code16],1
        call XMSinit
        jmp short .setup_dsc
.no_XMS:
        mov byte [systemtype wrt code16],0
        call rawinit
        jmp short .setup_dsc
.check_DPMI:
        mov dx,errmsg8
        mov byte [returncode wrt code16],0ffh          
        call check_DPMI
        jc near exit16err               ; If no DPMI -> No way for this
        mov byte [systemtype wrt code16],3 ;  program to enter PM
        call DPMIinit

.setup_dsc:
        mov eax,[code32a wrt code16]    ; Set up basic descriptors    
        add [gdtr+2 wrt code16],eax
        mov [code32dsc.base0_15 wrt code16],ax
        mov [data32dsc.base0_15 wrt code16],ax
        add [mainTSSdsc.base0_15 wrt code16],ax
        shr eax,8
        mov [code32dsc.base16_23 wrt code16],ah
        mov [data32dsc.base16_23 wrt code16],ah
        add [mainTSSdsc.base16_23 wrt code16],ah
        mov eax,[code16a wrt code16]
        mov [code16dsc.base0_15 wrt code16],ax
        mov [data16dsc.base0_15 wrt code16],ax
        shr eax,8
        mov [code16dsc.base16_23 wrt code16],ah
        mov [data16dsc.base16_23 wrt code16],ah

        movzx eax,word [psp_seg wrt code16] ; Set up PSP descriptor
        shl eax,4
        mov [pspdsc.base0_15 wrt code16],ax
        shr eax,8
        mov [pspdsc.base16_23 wrt code16],ah

        movzx eax,word [env_seg wrt code16] ; Set up environment descriptor
        shl eax,4
        mov [envdsc.base0_15 wrt code16],ax
        shr eax,8
        mov [envdsc.base16_23 wrt code16],ah

        call check_windows              ; present -> windows=1 else windows=0

        mov es,[psp_seg wrt code16]     ; es=segment of the block
        mov ebx,[lomembase wrt code16]  ; ebx=offset 1st byte free low mem
        add ebx,0fh                     ; ebx=idem, round up to paragraphs
        and bl,0f0h                     ; Align lomembase on paragraph
        mov [lomembase wrt code16],ebx  ;  boundary -> MCB at lomembase
        add ebx,[code32a wrt code16]
        shr ebx,4                       ; bx=offset in paragraph units
        sub bx,[psp_seg wrt code16]     ; bx=new block size in paragraphs
        mov ah,4ah                      ; ah=4ah -> Modify allocated mem block
        int 21h
        mov dx,errmsg0                  ; int 21h/ah=4ah places 1 byte + 2
        mov byte [returncode wrt code16],0ffh ;  words at 1st para above
        jc near exit16err               ;  lomembase if not already on para.
                                        ;  These bytes are part of the 16
                                        ;  byte MCB and must be preserved.
        add dword [lomembase wrt code16],16 ; Save MCB

        movzx bx,[systemtype wrt code16]
        shl bx,1                        ; Elements of enterPM are words
        jmp [enterPM+bx]
;
raw_PM:
%ifdef  SWAT
        push word 0
        pop es
        cmp dword [es:4*67h],0          ; Check if int 67h handler exists
        je short .done

        mov ax,0de0bh                   ; al=bh -> Inform debugger about 8259
        mov bx,MASTER_BASE              ;  vector mappings
        mov cx,SLAVE_BASE
        int 67h

.done:
%endif
        lgdt [gdtr wrt code16]          ; Switch to protected mode
        lidt [idtr wrt code16]
        mov eax,cr0
        or al,1                         ; Set Protection Enable bit
        mov cr0,eax
        jmp code32dsc-gdt:start32       ; Exit 16-bit PM, enter 32-bit PM
;
VCPI_PM:
        mov ax,0de0bh                   ; al=bh -> Inform VCPI server about
        mov bx,MASTER_BASE              ;  8259 vector mappings
        mov cx,SLAVE_BASE
        int 67h
        cmp ah,0                        ; No error?
        mov dx,errmsg6
        mov byte [returncode wrt code16],5
        jne near exit16err
        mov ax,[code16sel wrt code16]
        mov [VCPItargetPM+4 wrt code16],ax
        mov dword [VCPItargetPM wrt code16],.inPM
        mov esi,VCPIcr3                 ; Switch to protected mode
        add esi,[code32a wrt code16]    ; esi=system register table address
        mov ax,0de0ch                   ; al=ch -> Switch CPU mode
        int 67h
.inPM:
        mov ax,[cs:data16sel wrt code16]
        mov ds,ax
        mov es,ax
        mov fs,ax
        mov gs,ax
        mov ss,[data32sel wrt code16]
        mov esp,PM_STACKSIZE
        call VCPIinitPM                 ; Make page tables in extended mem to
                                        ;  _really_ support 4GB
%ifdef  SWAT
        cli
        mov word [v86r_ax wrt code16],0def0h ; al=f0h -> Det. dbgr presence
        mov al,67h
        int RMCALL_VECT
        cmp byte [v86r_ah wrt code16],0
        jne short .done

        mov word [v86r_ax wrt code16],0def4h ; al=f4h -> Set dbgr CR3&lin addr
        mov eax,[VCPIcr3 wrt code16]
        mov [v86r_ebx wrt code16],eax   ; ebx=new CR3 (new to debugger)
        mov dword [v86r_edx wrt code16],-1
        mov al,67h                      ; edx=new lin address; -1 = no change
        int RMCALL_VECT
        cmp byte [v86r_ah wrt code16],0 ; No error?
        jne near VCPIexit               ; Can't exit via exit16err; now in PM

.done:
%endif
        jmp code32dsc-gdt:start32       ; Exit 16-bit PM, enter 32-bit PM
;
DPMI_PM:
        mov es,[DPMImem_hst_seg]
        mov ax,1                        ; Indicate 32-bit application
        call far [DPMIcall]             ; Switch to protected mode
        mov dx,errmsg5
        mov byte [returncode wrt code16],2
        jc near exit16err               ; Error, still in real mode
        call DPMIinitPM                 ; Init that can be done in PM only

        mov ax,[data32sel]
        mov ss,ax
        mov esp,PM_STACKSIZE

        mov word [esp-4],DPMIstart32
        mov ax,[code32sel]
        mov [esp-2],ax
        jmp far [esp-4]                 ; Exit 16-bit PM, enter 32-bit PM
;
RM_int:                                 ; Call real mode interrupt handler
        lidt [cs:idtrRM]                ; Switch to real mode
        mov eax,cr0
        and al,0feh                     ; Clear Protection Enable bit
        mov cr0,eax
        jmp code16:$+5                  ; JMP intersegment (sets cs)

        xor esp,esp                     ; Avoid flat RM problems (hidden part
        mov di,RM_int_toPM              ;  of seg regs stays active in RM)
        mov bh,0                        ; necessary for RM_IRQ
        mov bl,[cs:RM_intnum]
        cmp bl,8                        ; int<8?
        jb short .noIRQ
        sub bl,8                        ; necessary for RM_IRQ
        cmp bl,7                        ; int>=8 && int<=15?
        jbe near RM_IRQ
        cmp bl,70h-8                    ; int>15 && int<70h?
        jb short .noIRQ
        sub bl,70h-16                   ; necessary for RM_IRQ
        cmp bl,8+7                      ; int>=70h && int<=77h?
        jbe near RM_IRQ
.noIRQ:
        mov ax,stackseg
        mov ss,ax
        mov sp,RM_STACKSIZE
        mov ax,[cs:v86r_flags wrt code16]
        and ah,~ 2                      ; Keep interrupts disabled
        push ax
        popf
        mov ds,[cs:v86r_ds wrt code16]
        mov es,[cs:v86r_es wrt code16]
        mov fs,[cs:v86r_fs wrt code16]
        mov gs,[cs:v86r_gs wrt code16]
        mov eax,[cs:v86r_eax wrt code16]
        mov ebx,[cs:v86r_ebx wrt code16]
        mov ecx,[cs:v86r_ecx wrt code16]
        mov edx,[cs:v86r_edx wrt code16]
        mov esi,[cs:v86r_esi wrt code16]
        mov edi,[cs:v86r_edi wrt code16]
        mov ebp,[cs:v86r_ebp wrt code16]               
        db 0cdh                         ; INT opcode
RM_intnum       db      0
        mov [cs:v86r_eax wrt code16],eax
        pushf
        pop ax
        and ah,~ 42h                    ; Clear NT flag and IF (Int Enable);
        push ax                         ;  int 21h/ah=4bh sets these flags
        popf
        mov [cs:v86r_flags wrt code16],ax
        mov [cs:v86r_ebx wrt code16],ebx
        mov [cs:v86r_ecx wrt code16],ecx
        mov [cs:v86r_edx wrt code16],edx
        mov [cs:v86r_esi wrt code16],esi
        mov [cs:v86r_edi wrt code16],edi
        mov [cs:v86r_ebp wrt code16],ebp
        mov [cs:v86r_ds wrt code16],ds
        mov [cs:v86r_es wrt code16],es
        mov [cs:v86r_fs wrt code16],fs
        mov [cs:v86r_gs wrt code16],gs

RM_int_toPM:
        lgdt [cs:gdtr wrt code16]       ; Switch to protected mode
        lidt [cs:idtr wrt code16]
        mov eax,cr0
        or al,1                         ; Set Protection Enable bit
        mov cr0,eax
        jmp code32dsc-gdt:retfromRM_int
;
VCPI_int:                               ; Call real mode interrupt handler
        mov di,VCPI_int_toPM
        mov bh,0                        ; necessary for RM_IRQ
        mov bl,[cs:VCPI_intnum]
        cmp bl,8                        ; int<8?
        jb short .noIRQ
        sub bl,8                        ; necessary for RM_IRQ
        cmp bl,7                        ; int>=8 && int<=15?
        jbe near RM_IRQ
        cmp bl,70h-8                    ; int>15 && int<70h?
        jb short .noIRQ
        sub bl,70h-16                   ; necessary for RM_IRQ
        cmp bl,8+7                      ; int>=70h && int<=77h?
        jbe near RM_IRQ
.noIRQ:
        mov ax,[cs:v86r_flags wrt code16] ; VCPI server doesn't do this
        and ah,~ 2                      ; Keep interrupts disabled
        push ax
        popf
        mov eax,[cs:v86r_eax wrt code16]
        mov ebx,[cs:v86r_ebx wrt code16]
        mov ecx,[cs:v86r_ecx wrt code16]
        mov edx,[cs:v86r_edx wrt code16]
        mov esi,[cs:v86r_esi wrt code16]
        mov edi,[cs:v86r_edi wrt code16]
        mov ebp,[cs:v86r_ebp wrt code16]               
        db 0cdh                         ; INT opcode
VCPI_intnum     db      0
        cli
        mov [cs:v86r_eax wrt code16],eax
        mov [cs:v86r_ebx wrt code16],ebx
        mov [cs:v86r_ecx wrt code16],ecx
        mov [cs:v86r_edx wrt code16],edx
        mov [cs:v86r_esi wrt code16],esi
        mov [cs:v86r_edi wrt code16],edi
        mov [cs:v86r_ebp wrt code16],ebp
        mov [cs:v86r_ds wrt code16],ds
        mov [cs:v86r_es wrt code16],es
        mov [cs:v86r_fs wrt code16],fs
        mov [cs:v86r_gs wrt code16],gs
        pushf
        pop ax
        mov [cs:v86r_flags wrt code16],ax

VCPI_int_toPM:
        mov ax,[cs:code32sel wrt code16]
        mov [cs:VCPItargetPM+4 wrt code16],ax
        mov dword [cs:VCPItargetPM wrt code16],retfromVCPI_int
        mov esi,VCPIcr3                 ; Switch to protected mode
        add esi,[cs:code32a wrt code16] ; esi=system register table address
        mov ax,0de0ch                   ; al=ch -> Switch CPU mode
        int 67h                         ; code32sel:retfromVCPI_int
;
RM_IRQ:                                 ; Call real mode IRQ handler
        lss sp,[cs:IRQstackRM]
        sub word [cs:IRQspRM],IRQ_STACKSIZE
        push word 3002h                 ; IRQ handler is CALLed but does IRET
        shl bx,2
        call far [cs:IRQvector+bx]      ; Call original RM handler
        add word [cs:IRQspRM],IRQ_STACKSIZE ; Return to exiting code RM_int /
        jmp di                          ;  VCPI_int when IRQ handler returns
;
RMcallbackIRQ0:                         ; Redirect an IRQ in RM to PM
        pushad
        mov bx,MASTER_BASE
        jmp RMcallbackIRQ
RMcallbackIRQ1:
        pushad
        mov bx,MASTER_BASE+1
        jmp RMcallbackIRQ
RMcallbackIRQ2:
        pushad
        mov bx,MASTER_BASE+2
        jmp RMcallbackIRQ
RMcallbackIRQ3:
        pushad
        mov bx,MASTER_BASE+3
        jmp RMcallbackIRQ
RMcallbackIRQ4:
        pushad
        mov bx,MASTER_BASE+4
        jmp RMcallbackIRQ
RMcallbackIRQ5:
        pushad
        mov bx,MASTER_BASE+5
        jmp RMcallbackIRQ
RMcallbackIRQ6:
        pushad
        mov bx,MASTER_BASE+6
        jmp RMcallbackIRQ
RMcallbackIRQ7:
        pushad
        mov bx,MASTER_BASE+7
        jmp RMcallbackIRQ
RMcallbackIRQ8:
        pushad
        mov bx,SLAVE_BASE
        jmp RMcallbackIRQ
RMcallbackIRQ9:
        pushad
        mov bx,SLAVE_BASE+1
        jmp RMcallbackIRQ
RMcallbackIRQ10:
        pushad
        mov bx,SLAVE_BASE+2
        jmp RMcallbackIRQ
RMcallbackIRQ11:
        pushad
        mov bx,SLAVE_BASE+3
        jmp RMcallbackIRQ
RMcallbackIRQ12:
        pushad
        mov bx,SLAVE_BASE+4
        jmp RMcallbackIRQ
RMcallbackIRQ13:
        pushad
        mov bx,SLAVE_BASE+5
        jmp RMcallbackIRQ
RMcallbackIRQ14:
        pushad
        mov bx,SLAVE_BASE+6
        jmp RMcallbackIRQ
RMcallbackIRQ15:
        pushad
        mov bx,SLAVE_BASE+7
RMcallbackIRQ:
        push ds
        push es
        push fs
        push gs
        mov cx,ss
        mov edx,esp

        push word code32
        pop ds
        shl bx,3                        ; bx=offset of dsc in IDT
        add bx,idt
        mov di,[bx+6]
        shl edi,16
        mov di,[bx]                     ; edi=offset of PM IRQ handler
        mov bx,[bx+2]                   ; bx=selector of PM IRQ handler

        cmp byte [cs:systemtype wrt code16],2
        je short .VCPItoPM
        lgdt [cs:gdtr wrt code16]       ; Switch to protected mode
        lidt [cs:idtr wrt code16]
        mov eax,cr0
        or al,1                         ; Set Protection Enable bit
        mov cr0,eax
        jmp code16dsc-gdt:.inPM

.VCPItoPM:
        mov ax,[cs:code16sel wrt code16]
        mov [cs:VCPItargetPM+4 wrt code16],ax
        mov dword [cs:VCPItargetPM wrt code16],.inPM
        mov esi,VCPIcr3                 ; Switch to protected mode
        add esi,[cs:code32a wrt code16] ; esi=system register table address
        mov ax,0de0ch                   ; al=ch -> Switch CPU mode
        int 67h

.inPM:
        mov ax,[cs:data32sel wrt code16]
        mov ds,ax                       ; seg regs must have a valid PM value
        mov es,ax
        mov fs,ax
        mov gs,ax
        lss esp,[IRQstackPM]
        sub dword [IRQespPM],IRQ_STACKSIZE
        add dword [stackoff],stackPM2-stackPM
        push dword 3002h                ; 32-bit stack -> IRETD pops 12 bytes
        mov [esp-6],edi
        mov [esp-2],bx
        call dword far [esp-6]          ; Call PM IRQ handler

        sub dword [stackoff],stackPM2-stackPM
        add dword [IRQespPM],IRQ_STACKSIZE
        cmp byte [systemtype],2
        je short .VCPItoRM
        lidt [cs:idtrRM]                ; Switch to real mode
        mov eax,cr0
        and al,0feh                     ; Clear Protection Enable bit
        mov cr0,eax
        jmp code16:.inRM

.VCPItoRM:                              ; ss:esp=sel:off system register tbl
        push dword 0                    ; V86 gs
        push dword 0                    ; V86 fs
        push dword code16               ; V86 ds
        push dword code16               ; V86 es
        push ecx                        ; V86 ss
        push edx                        ; V86 esp
        push dword 3002h                ; eflags (dummy, _not_ value in V86)
        push dword code16               ; V86 cs
        push dword .inRM                ; V86 eip
        mov ds,[VCPIsel1]
        mov ax,0de0ch                   ; al=ch -> Switch CPU mode
        call dword far [cs:VCPIcall wrt code16] ; Switch to V86 mode

.inRM:
        mov ss,cx                       ; Restore stack active on entry
        mov esp,edx
        pop gs
        pop fs
        pop es
        pop ds
        popad
        iret
;
rawexit:                            
        lidt [idtrRM]                   ; Switch to real mode
        mov eax,cr0
        and al,0feh                     ; Clear Protection Enable bit
        mov cr0,eax
        jmp code16:$+5                  ; JMP intersegment (sets cs)
        mov ax,cs                       ; back in real mode
        mov ds,ax
        mov es,ax
        mov ax,stackseg
        mov ss,ax
        mov sp,RM_STACKSIZE
        call restoreIVT
%ifdef  SWAT
        push word 0
        pop es
        cmp dword [es:4*67h],0          ; Check if int 67h handler exists
        je short .done

        mov ax,0de0bh                   ; al=bh -> Inform debugger about 8259
        movzx bx,[omPICbase wrt code16] ;  vector mappings
        movzx cx,[osPICbase wrt code16]
        int 67h

.done:
%endif
        call disableA20
        mov al,[returncode wrt code16]
        mov ah,4ch
        int 21h
;
XMSexit:
        lidt [idtrRM]                   ; Switch to real mode
        mov eax,cr0
        and al,0feh                     ; Clear Protection Enable bit
        mov cr0,eax
        jmp code16:$+5                  ; JMP intersegment (sets cs)
        mov ax,cs                       ; back in real mode
        mov ds,ax
        mov es,ax
        mov ax,stackseg
        mov ss,ax
        mov sp,RM_STACKSIZE
XMSdealloc:
        call restoreIVT
%ifdef  SWAT
        cmp byte [systemtype wrt code16],2
        je short .done                  ; VCPI code already informed server
        push word 0
        pop es
        cmp dword [es:4*67h],0          ; Check if int 67h handler exists
        je short .done

        mov ax,0de0bh                   ; al=bh -> Inform debugger about 8259
        movzx bx,[omPICbase wrt code16] ;  vector mappings
        movzx cx,[osPICbase wrt code16]
        int 67h

.done:
%endif
        push word [returncode wrt code16] ; Save returncode for when no error
        mov dx,[XMShandle]              ; dx=block handle
        mov ah,0dh                      ; ah=0dh -> Unlock extended mem block
        call far [XMScall]
        cmp al,0                        ; Error?
        mov dx,errmsg7
        mov byte [returncode wrt code16],4
        je near exit16err

        mov dx,[XMShandle]              ; dx=block handle
        mov ah,0ah                      ; ah=0ah -> Release ext mem block
        call far [XMScall]
        cmp al,0                        ; Error?
        mov dx,errmsg7
        mov byte [returncode wrt code16],5
        je near exit16err

        mov ah,6                        ; ah=6 -> Local disable A20 line
        call far [XMScall]
        cmp al,0                        ; Error?
        mov dx,errmsg4
        mov byte [returncode wrt code16],0ffh
        je near exit16err

        pop ax                          ; al=returncode
        mov ah,4ch
        int 21h
;
VCPIexit:                               ; This code runs in 16-bit PM
        mov es,[zerosel wrt code16]     ; Deallocate pages
        mov edi,[VCPIpage_base]
        mov ecx,[VCPIpage_top]
        sub ecx,edi
        jbe short .dealloc_done
        shr ecx,2                       ; ecx=number of pages to free
        add edi,[VCPIdir_base wrt code16]
.next_page:
        mov edx,[es:edi]                ; edx=physical page address
        and dx,0f000h
        mov ax,0de05h                   ; al=5 -> Free a 4KB page
        call dword far [cs:VCPIcall wrt code16]
        add edi,4
        loop .next_page
.dealloc_done:
                                        ; ss:esp=sel:off system register tbl
        push dword 0                    ; V86 gs
        push dword 0                    ; V86 fs
        push dword code16               ; V86 ds
        push dword code16               ; V86 es
        push dword stackseg             ; V86 ss
        push dword RM_STACKSIZE         ; V86 esp
        push dword 3002h                ; eflags (dummy, _not_ value in V86)
        push dword code16               ; V86 cs
        push dword .inRM                ; V86 eip
        mov ds,[cs:VCPIsel1 wrt code16]
        mov ax,0de0ch                   ; al=ch -> Switch CPU mode
        call dword far [cs:VCPIcall wrt code16] ; Switch to V86 mode
.inRM:                                  ; back in V86 mode
        mov ax,0de0bh                   ; al=bh -> Inform VCPI server about
        movzx bx,[omPICbase wrt code16] ;  8259 vector mappings
        movzx cx,[osPICbase wrt code16]
        int 67h

        call check_XMS                  ; If XMS was present at startup mem
        jnc near XMSdealloc             ;  has been allocated through XMS, so
        call restoreIVT                 ;  deallocation through XMS too
        mov al,[returncode wrt code16]
        mov ah,4ch
        int 21h
;
DPMIexit:                               ; This code runs in 16-bit PM
        mov al,[returncode wrt code16]
        mov ah,4ch
        int 21h

;
exit16err:                              ; Exit program with error message
        mov ah,9                        ; ah=9 -> Print string at ds:dx
        int 21h
        cmp byte [returncode wrt code16],0ffh
        je short .exit                  ; If returncode=ffh -> Don't print it

        mov ah,6                        ; ah=6 -> Direct console I/O
        movzx si,[returncode wrt code16]
        shr si,4                        ; Print most significant nibble 1st
        mov dl,[hextbl+si wrt code16]   ; dl=character to output
        int 21h

        movzx si,[returncode wrt code16]; Print least significant nibble 2nd
        and si,0fh
        mov dl,[hextbl+si wrt code16]
        int 21h

.exit:
        mov ah,0                        ; ah=0 -> Wait for key and read char
        int 16h

        mov al,[returncode wrt code16]
        mov ah,4ch
        int 21h
;
exit16errDPMI:                          ; Exit program with error message
        mov word [v86r_ds wrt code16],code16 ; Real mode ds is needed
        mov byte [v86r_ah wrt code16],9 ; ah=9 -> Print string at ds:dx
        mov word [v86r_ss wrt code16],0 ; Let DPMI host provide a stack
        mov word [v86r_sp wrt code16],0
        mov ax,300h                     ; ax=300h -> Simulate real mode int
        mov cx,ds                
        mov es,cx                       ; cx=number of words to copy from
        xor cx,cx                       ;  protected mode to real mode stack
        mov edi,v86r_edi wrt code16     ; es:edi=address of RM call structure
        mov bh,0                        ; bh=flags
        mov bl,21h                      ; bl=interrupt number
        int 31h
        cmp byte [returncode wrt code16],0ffh
        je short .exit                  ; If returncode=ffh -> Don't print it

        mov byte [v86r_ah wrt code16],6 ; ah=6 -> Direct console I/O
        movzx si,[returncode wrt code16]
        shr si,4                        ; Print most significant nibble 1st
        mov si,[hextbl+si wrt code16]
        mov [v86r_dx wrt code16],si     ; dl=character to output
        int 31h

        movzx si,[returncode wrt code16]; Print least significant nibble 2nd
        and si,0fh
        mov si,[hextbl+si wrt code16]
        mov [v86r_dx wrt code16],si
        int 31h

.exit:
        mov byte [v86r_ah wrt code16],0 ; ah=0 -> Wait for key and read char 
        mov bl,16h
        int 31h

        mov al,[returncode wrt code16]
        mov ah,4ch
        int 21h

segment code32
; public align=16 use32, specified in RAW32.INC

;32-bit data
callbackoff:                            ; temporary init variables
dw      RMcallbackIRQ0, RMcallbackIRQ1, RMcallbackIRQ2, RMcallbackIRQ3
dw      RMcallbackIRQ4, RMcallbackIRQ5, RMcallbackIRQ6, RMcallbackIRQ7
dw      RMcallbackIRQ8, RMcallbackIRQ9, RMcallbackIRQ10, RMcallbackIRQ11
dw      RMcallbackIRQ12, RMcallbackIRQ13, RMcallbackIRQ14, RMcallbackIRQ15
excoff:
dw      exc0, exc1, exc2, exc3, exc4, exc5, exc6, exc7, exc8,
dw      exc9, exc10, exc11, exc12, exc13, exc14, exc15, exc16
IRQoff:
dw      IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7
dw      IRQ8,  IRQ9, IRQ10, IRQ11, IRQ12, IRQ13, IRQ14, IRQ15
%if     ($-callbackoff) > PM_STACKSIZE  ; Stack large enough for temp vars?
%error  "Increase value of PM_STACKSIZE"
%endif
                resb    PM_STACKSIZE-$+callbackoff ; protected mode stack

                resb    5*IRQ_STACKSIZE
IRQstackPM:                             ; PM IRQ stacks (5 nesting levels)
IRQespPM        dd      $
                dw      data32dsc-gdt

                resb    44              ; stack needed under VCPI when prog's
VCPIstack:                              ;  stack resides above 1MB; this to
                dd      $               ;  comply with the specification
                dw      data32dsc-gdt

gdt             seg_descriptor
code32dsc       seg_descriptor  0ffffh, 0, 0, 10011010b, 11001111b, 0
data32dsc       seg_descriptor  0ffffh, 0, 0, 10010010b, 11001111b, 0
zerodsc         seg_descriptor  0ffffh, 0, 0, 10010010b, 11001111b, 0
code16dsc       seg_descriptor  0ffffh, 0, 0, 10011010b, 0,         0
data16dsc       seg_descriptor  0ffffh, 0, 0, 10010010b, 0,         0
pspdsc          seg_descriptor  0ffh,   0, 0, 10010010b, 0,         0
envdsc          seg_descriptor  0ffffh, 0, 0, 10010010b, 0,         0
mainTSSdsc      seg_descriptor  67h, mainTSS, 0, 10001001b, 0, 0
VCPIdsc0        seg_descriptor          ; 68h bytes for task state. Default
VCPIdsc1        seg_descriptor          ;  I/O map base=68h(104d). That's >=
VCPIdsc2        seg_descriptor          ;  TSS limit -> no I/O permission map
freedsc         times N_DSC dd 0,0
%ifdef  SWAT
SWATdsc         times 30 dd 0,0         ; 386SWAT needs 30 dscs (set to 0)
%endif
gdtr            dw      $-gdt-1, gdt, 0

align 4
code32sel       dw      code32dsc  -gdt
data32sel       dw      data32dsc  -gdt
zerosel         dw      zerodsc    -gdt
code16sel       dw      code16dsc  -gdt
data16sel       dw      data16dsc  -gdt
pspsel          dw      pspdsc     -gdt
envsel          dw      envdsc     -gdt
mainTSSsel      dw      mainTSSdsc -gdt
VCPIsel0        dw      VCPIdsc0   -gdt
VCPIsel1        dw      VCPIdsc1   -gdt
VCPIsel2        dw      VCPIdsc2   -gdt
freesel         dw      freedsc    -gdt ; selector base of allocation

align 16
idt             resb    (HIGHEST_INT+1)*8
idtr            dw      (HIGHEST_INT+1)*8-1,0,0
mainTSS         TSS     0, PM_STACKSIZE, data32dsc-gdt
                                        ; No I/O permission map, see above
omPICbase       db      8               ; original master PIC base interrupt
osPICbase       db      70h             ; original slave PIC base interrupt

align 4
mPICbase:                               ; master PIC base interrupt
IRQ0_vect       db      MASTER_BASE
sPICbase:                               ; slave PIC base interrupt
IRQ8_vect       db      SLAVE_BASE

v86r_edi:
v86r_di         dw      0
                dw      0
v86r_esi:
v86r_si         dw      0
                dw      0
v86r_ebp:
v86r_bp         dw      0
                dw      0
                dd      0               ; Reserved under DPMI
v86r_ebx:
v86r_bx:
v86r_bl         db      0
v86r_bh         db      0
                dw      0
v86r_edx:
v86r_dx:
v86r_dl         db      0
v86r_dh         db      0
                dw      0
v86r_ecx:
v86r_cx:
v86r_cl         db      0
v86r_ch         db      0
                dw      0
v86r_eax:
v86r_ax:
v86r_al         db      0
v86r_ah         db      0
                dw      0
v86r_flags      dw      0
v86r_es         dw      0
v86r_ds         dw      0
v86r_fs         dw      0
v86r_gs         dw      0
v86r_ip         dw      0               ; Reserved under DPMI, but ignored
v86r_cs         dw      0               ; Reserved under DPMI, but ignored
v86r_sp         dw      0
v86r_ss         dw      0

DPMImem         dd      0               ; mem available through DPMI in KB

; system register table start 
align 4
VCPIcr3         dd      0               ; physical address of page directory
VCPIgdtr        dd      gdtr
VCPIidtr        dd      idtr
VCPIldt         dw      0
VCPItask        dw      0
VCPItargetPM    dd      0               ; 32-bit offset
                dw      0               ; selector
; system register table end 

VCPIdir_base    dd      0               ; linear address of page directory
VCPIcall        dd      0               ; 32-bit offset
                dw      0               ; selector
VCPImem         dd      0               ; mem available through VCPI in KB
XMSmem          dd      0               ; mem available through XMS in KB
systemtype      db      0               ; 0=raw, 1=XMS, 2=VCPI, 3=DPMI

pspbuffer       times 256 db 0
psp_seg         dw      0
env_seg         dw      0
paramblock      dw      0, cmdtail, code32, 5ch, 0, 6ch, 0
cmdtail         db      0               ; command tail counter
                db      '/c '           ; COMMAND.COM: execute command and
                presettail equ ($-cmdtail)-1 ; ret (no space before /c needed)
                times 127-presettail db 0
comspec         db      'COMSPEC='

align 4
exitPM32        dd      0               ; 32-bit offset
                dw      0               ; selector
code16a         dd      0               ; linear address of code16
code32a         dd      0               ; linear address of code32
lomembase       dd      0               ; low memory base of allocation
lomemtop        dd      0               ; top of low memory
himembase       dd      0               ; high memory base of allocation
himemtop        dd      0               ; top of high memory
stackPM:
espPM           dd      0               ; temporary variables to save stack
ssPM            dw      0               ;  registers (when calling a RM int)
stackPM2:                               ; necessary if RMCB AND PM IRQ
                dw      0,0,0           ;  handler calls RM handler
                dw      0,0,0           ; necessary if nested RMCB
                dw      0,0,0           ; RMCB nesting level 2
                dw      0,0,0           ; RMCB nesting level 3 (yes, found
stackoff        dd      0               ;  under extreme circumstances)
mPICmask        db      0               ; original master PIC mask
sPICmask        db      0               ; original slave PIC mask
totalextmem     dd      0               ; total extended memory present
hextbl          db      '0123456789ABCDEF'
windows         db      0               ; 0=Windows not active; 1=Windows
returncode      db      0               ; DOS return code

;32-bit code
RMcall_int:                             ; al=RM interrupt number to call
        pushad
        push ds
        push es
        push fs
        push gs
        mov es,[cs:data16sel]           ; Use es, by default < 64KB (pspsel)
        mov [es:RM_intnum],al
        mov ebx,[cs:stackoff]           ; necessary if RMCB is being handled
        mov [es:ssPM+ebx wrt code16],ss ; Save protected mode ss:esp
        mov [es:espPM+ebx wrt code16],esp

        jmp code16dsc-gdt:RM_int
retfromRM_int:                            
        mov ebx,[cs:stackoff]
        lss esp,[cs:stackPM+ebx]        ; Restore protected mode ss:esp
        pop gs
        pop fs
        pop es
        pop ds
        popad
        and byte [esp+9],~ 40h          ; Some DOS/BIOS functions set NT flag
        iretd                           ;  (only necessary if RMCBs active)
;
VCPIcall_int:                           ; al=RM interrupt number to call
        pushad
        push ds
        push es
        push fs
        push gs
        mov ds,[cs:data16sel]
        mov [VCPI_intnum],al
        mov ebx,[stackoff wrt code16]   ; necessary if RMCB is being handled
        mov [ssPM+ebx wrt code16],ss    ; Save protected mode ss:esp
        mov [espPM+ebx wrt code16],esp
        lss esp,[VCPIstack wrt code16]  ; necessary when program's stack
                                        ;  resides above 1MB
                                        ; ss:esp=sel:off system register tbl
        push dword [v86r_gs wrt code16] ; V86 gs
        push dword [v86r_fs wrt code16] ; V86 fs
        push dword [v86r_ds wrt code16] ; V86 ds
        push dword [v86r_es wrt code16] ; V86 es
        push dword stackseg             ; V86 ss
        push dword RM_STACKSIZE         ; V86 esp
        push dword 3002h                ; eflags (dummy, _not_ value in V86)
        push dword code16               ; V86 cs
        push dword VCPI_int             ; V86 eip
        mov ds,[VCPIsel1 wrt code16]
        mov ax,0de0ch                   ; al=ch -> Switch CPU mode
        call far [cs:VCPIcall]          ; Switch to V86 mode
retfromVCPI_int:
        mov ebx,[cs:stackoff]
        lss esp,[cs:stackPM+ebx]        ; Restore protected mode ss:esp
        pop gs
        pop fs
        pop es
        pop ds
        popad
        iretd
;
DPMIcall_int:                           ; al=RM interrupt number to call
        push eax
        push ebx
        push ecx
        push edi
        push es
        mov es,[cs:data32sel]
        mov word [es:v86r_ss],stackseg
        mov word [es:v86r_sp],RM_STACKSIZE
        mov bh,0                        ; bh=flags
        mov bl,al                       ; bl=interrupt number
        mov ax,300h                     ; ax=300h -> Simulate real mode int
        xor cx,cx                       ; cx=number of words to copy from
        mov edi,v86r_edi                ;  protected mode to real mode stack
        int 31h                         ; es:edi=address of RM call structure
        pop es
        pop edi
        pop ecx
        pop ebx
        pop eax
        iretd
;Exceptions
exc0:                                   ; divide error
        push eax
        mov al,0
        jmp IRQ                         ; Use real mode handler
exc1:                                   ; debug exception
        push eax
        mov al,1
        jmp IRQ                         ; Use real mode handler
exc2:                                   ; nonmaskable interrupt
        push eax
        mov al,2
        jmp IRQ                         ; Use real mode handler
exc3:                                   ; breakpoint
        push eax
        mov al,3
        jmp IRQ                         ; Use real mode handler
exc4:                                   ; overflow
        push eax
        mov al,4
        jmp IRQ                         ; Use real mode handler
exc5:                                   ; bounds check
        push eax
        mov al,5
        jmp IRQ                         ; Use real mode handler
exc6:                                   ; invalid opcode
        pushad
        mov dl,6
        jmp exc
exc7:                                   ; coprocessor not available
        push eax
        mov al,7
        jmp IRQ                         ; Use real mode handler
exc8:                                   ; double fault
        pushad
        mov dl,8
        jmp exc
exc9:                                   ; coprocessor segment overrun
        pushad
        mov dl,9
        jmp exc
exc10:                                  ; invalid TSS
        pushad
        mov dl,10
        jmp exc
exc11:                                  ; segment not present
        pushad
        mov dl,11
        jmp exc
exc12:                                  ; stack exception
        pushad
        mov dl,12
        jmp exc
exc13:                                  ; general protection
        pushad
        mov dl,13
        jmp exc
exc14:                                  ; page fault
        sub esp,4
        push eax
        mov eax,cr2
        mov [esp+4],eax                 ; CR2 contains the linear address
        pop eax                         ;  that caused the exception
        pushad
        mov dl,14
        jmp exc
exc16:                                  ; coprocessor error
        pushad
        mov dl,16
        jmp exc
exc15:
unexp:                                  ; unexpected interrupt
        pushad
        mov dl,0ffh
exc:
        mov ax,[cs:data32sel]
        mov ds,ax
        mov es,ax
        mov fs,ax
        mov ax,[zerosel]
        mov gs,ax
        mov al,0ffh
        out 21h,al                      ; Mask off all low IRQs
        out 0a1h,al                     ; Mask off all high IRQs
        mov word [v86r_ax],3            ; Set video mode 3 (text 80x25 color)
        mov al,10h
        int RMCALL_VECT
        mov edi,0b8000h+2*160           ; Start printing on 3rd row
        sub edi,[code32a]
        mov ebx,hextbl
        mov ecx,2                       ; Print 2 characters
        shl edx,24                      ; exc number in bits 24-31
        mov ah,0fh                      ; bright white
        cld
        call putnum                     ; Print exception number
        add edi,152                     ; Goto next row
        mov ah,9                        ; bright blue
        mov ebp,8                       ; Print edi,esi,ebp,esp,ebx,edx,ecx,
putreg:                                 ;  eax = 8 registers
        pop edx                         ; Get register content from stack
        mov cl,8                        ; Print 8 nibbles = 32 bits
        call putnum
        dec ebp
        jnz putreg

        mov ebp,esp                     ; Do a stack dump
        mov esi,esp
        add esi,16*8*4                  ; Print 16 rows of 8 dwords=512 bytes
        mov ah,7                        ; white
putstack:
        mov edx,[ebp]                   ; Get dword from stack
        add ebp,4
        mov cl,8                        ; Print 8 nibbles = 32 bits
        call putnum
        cmp ebp,esi                     ; Last dword on stack printed?
        jb putstack                     ; esp < ebp -> Print another dword
        jmp exit
;
putnum:
        rol edx,4
        mov al,dl
        and al,0fh
        xlatb                           ; mov al,[ds:ebx+al]
        stosw                           ; mov [es:edi],ax
        loop putnum
        mov al,' '
        stosw                           ; Print 2 spaces
        stosw
        ret
;IRQs
IRQ0:
        push eax
        mov al,8
        jmp short IRQ
IRQ1:
        push eax
        mov al,9
        jmp short IRQ
IRQ2:
        push eax
        mov al,0ah
        jmp short IRQ
IRQ3:
        push eax
        mov al,0bh
        jmp short IRQ
IRQ4:
        push eax
        mov al,0ch
        jmp short IRQ
IRQ5:
        push eax
        mov al,0dh
        jmp short IRQ
IRQ6:
        push eax
        mov al,0eh
        jmp short IRQ
IRQ7:
        push eax
        mov al,15
        jmp short IRQ
IRQ8:
        push eax
        mov al,70h
        jmp short IRQ
IRQ9:
        push eax
        mov al,71h
        jmp short IRQ
IRQ10:
        push eax
        mov al,72h
        jmp short IRQ
IRQ11:
        push eax
        mov al,73h
        jmp short IRQ
IRQ12:
        push eax
        mov al,74h
        jmp short IRQ
IRQ13:
        push eax
        mov al,75h
        jmp short IRQ
IRQ14:
        push eax
        mov al,76h
        jmp short IRQ
IRQ15:
        push eax
        mov al,77h
IRQ:                                    ; An IRQ in protected mode -> Reflect
        int RMCALL_VECT                 ;  the IRQ to real/V86 mode
        pop eax
        iretd
;
start32:
        mov ax,[cs:data32sel]
        mov ds,ax
        mov es,[pspsel]
        mov fs,ax
        mov gs,[zerosel]
        mov ss,ax
        mov esp,PM_STACKSIZE

        and byte [mainTSSdsc.access],~ 2; Clear Busy bit (only needed under
        ltr [mainTSSsel]                ;  VCPI; TR already loaded by server)

        pushfd
        pop eax
        or ah,30h                       ; bit 12-13 -> IOPL; 3=high, 0=low PL
        and ah,~ 40h                    ; Clear Nested Task Flag
        push eax
        popfd
        in al,21h                       ; Read Interrupt Mask Register of
        mov [mPICmask],al               ;  master PIC and save it
        in al,0a1h                      ; Read Interrupt Mask Register of
        mov [sPICmask],al               ;  slave PIC and save it
        set8259vectors MASTER_BASE,SLAVE_BASE
        mov al,[mPICmask]               ; Programming the PICs resets masks
        and al,0e4h                     ; Enable COM1+COM2 (mouse), keyboard,
        out 21h,al                      ;  timer
        mov al,[sPICmask]
        out 0a1h,al

DPMIstart32:                            ; DPMI entry point
        in al,21h                       ; ugly for non-DPMI...
        mov [mPICmask],al
        and al,0e4h
        out 21h,al
        in al,0a1h
        mov [sPICmask],al

        mov esi,pspbuffer               ; Restore Program Segment Prefix
        xor edi,edi
        mov ecx,256/4
        rep movsd

        mov byte [returncode],0         ; default return code of 0
        sti
        jmp main

;
exit:
        cli
        mov ebx,cs                      ; Check if running at PL 0; CLTS is
        lar ecx,ebx                     ;  a privileged instruction
        and ecx,6000h
        jnz short .done

        mov dl,[cs:omPICbase]
        mov dh,[cs:osPICbase]           ; 1st PIC IRQ 0->int omPICbase, ...
        set8259vectors dl,dh            ;  2nd PIC IRQ 0(8)->int osPICbase
        mov al,[cs:mPICmask]            ; Restore original values in Interrupt
        out 21h,al                      ;  Mask Register of master PIC
        mov al,[cs:sPICmask]            ; Restore original values in Interrupt
        out 0a1h,al                     ;  Mask Register of slave PIC

        clts                            ; Clear Task Switched bit (necessary
                                        ;  for some programs in order to run 
.done:                                  ;  properly under DOS after this prog)
        mov ax,[cs:data16sel]
        mov ds,ax
        mov es,ax
        mov fs,ax
        mov gs,ax
        mov ss,ax
        mov esp,IRQ_STACKSIZE           ; ax:RM_STACKSIZE doesn't make sense
        jmp far [cs:exitPM32]           ; Exit 32-bit PM, enter 16-bit PM


;
; Allocate any memory (first checks low, then high)
; In:
;   ds = data32sel
;   eax = size requested in bytes
; Out:
;   CF=0 -> Memory allocated
;     eax = linear pointer to memory
;     lomembase = lomembase + eax / himembase = himembase + eax
;   CF=1 -> Not enough memory
;     eax = ?
;
getmem:
        push eax
        call getlomem
        jnc short .exit
        pop eax
        jmp short gethimem
.exit:
        add esp,4
        ret

;
; Allocate low memory
; In:
;   ds = data32sel
;   eax = size requested in bytes
; Out:
;   CF=0 -> Memory allocated
;     eax = linear pointer to memory
;     lomembase = lomembase + eax
;   CF=1 -> Not enough memory
;     eax = ?
;
getlomem:
        add eax,[lomembase]
        cmp eax,[lomemtop]
        ja short .errexit
        xchg eax,[lomembase]
        clc
        ret
.errexit:
        stc
        ret

;
; Allocate high memory
; In:
;   ds = data32sel
;   eax = size requested in bytes
; Out:
;   CF=0 -> Memory allocated
;     eax = linear pointer to memory
;     himembase = himembase + eax
;   CF=1 -> Not enough memory
;     eax = ?
;
gethimem:
        add eax,[himembase]
        cmp eax,[himemtop]
        ja short .errexit
        xchg eax,[himembase]
        clc
        ret
.errexit:
        stc
        ret

;
; Get amount of free low memory
; Out:
;   eax = number of bytes free
;
lomemsize:
        mov eax,[cs:lomemtop]
        sub eax,[cs:lomembase]
        ret

;
; Get amount of free high memory
; Out:
;   eax = number of bytes free
;
himemsize:
        mov eax,[cs:himemtop]
        sub eax,[cs:himembase]
        ret

;
; Allocate descriptor
; In:
;   ds = data32sel
;   cx = number of descriptors to allocate
; Out:
;   CF=0 -> Descriptor successfully allocated
;     ax = first selector in array of descriptors
;   CF=1 -> Not enough free descriptors left
;     ax = ?
;
getdsc:
        push ecx
        cmp byte [systemtype],3         ; Under DPMI the GDT is not part of
        je short .DPMIgetdsc            ;  this program in memory

        shl cx,3                        ; A descriptor is 8 bytes in size
        add cx,[freesel]
        mov ax,[gdtr]                   ; gdtr[0] contains the GDT limit
        inc ax
        cmp cx,ax                       ; Enough descriptors free?
        ja short .errexit
        mov ax,[freesel]
        mov [freesel],cx
        clc                             ; CF set only on error
        jmp short .exit

.DPMIgetdsc:
        xor ax,ax                       ; ax=0 -> Allocate LDT descriptor
        int 31h
        jnc short .exit                 ; ax=first selector in array of dsc

.errexit:
        stc
.exit:
        pop ecx
        ret

;
; Set descriptor
; In:
;   ds = data32sel
;   es:edi = pointer to 8 byte descriptor buffer
;   bx = selector
; Out:
;   CF=0 -> Descriptor succesfully set
;   CF=1 -> Descriptor does not reside in the GDT/LDT (non DPMI)
;
setdsc:
        push eax
        push ebx
        cmp byte [systemtype],3         ; Under DPMI the GDT/LDT is not part
        je short .DPMIsetdsc            ;  of this program in memory

        test ebx,4                      ; Check TI bit
        jnz short .LDTdsc               ; If TI bit is set -> LDT descriptor
        and ebx,0fffch                  ; Clear RPL bits -> right offset
        movzx eax,word [gdtr]           ; gdtr[0] contains the GDT limit
        sub eax,7                       ; eax=highest valid GDT entry
        cmp ebx,eax                     ; Check if dsc resides in the GDT
        ja short .errexit
        mov eax,[es:edi]
        mov [gdt+ebx],eax
        mov eax,[es:edi+4]
        mov [gdt+ebx+4],eax
        clc                             ; CF set only on error
        jmp short .exit

.LDTdsc:
        and ebx,0fff8h                  ; Clear TI+RPL bits -> right offset
        sldt ax
        and eax,0ffffh                  ; Enable use of _e_ax
        cmp bx,[gdt.limit0_15+eax]      ; Check if dsc resides in the LDT
        ja short .errexit
        push edx
        mov dh,[gdt.base24_31+eax]
        mov dl,[gdt.base16_23+eax]
        shl edx,16
        mov dx,[gdt.base0_15+eax]       ; edx=linear address of LDT base
        add ebx,edx                     ; ebx=linear address of dsc in LDT
        pop edx
        sub ebx,[code32a]
        mov eax,[es:edi]
        mov [ebx],eax
        mov eax,[es:edi+4]
        mov [ebx+4],eax
        clc                             ; CF set only on error
        jmp short .exit

.DPMIsetdsc:
        mov ax,0ch                      ; ax=0ch -> Set descriptor
        int 31h
        jnc short .exit

.errexit:
        stc
.exit:
        pop ebx
        pop eax
        ret

;
; Get interrupt vector
; In:
;   bl = interrupt number
; Out:
;   cx:edx = selector:offset of handler
;
getvect:
        cmp byte [cs:systemtype],3      ; Under DPMI the IDT is not part of
        je short .DPMIgetvect           ;  this program in memory

        push ebx
        and ebx,0ffh    
        shl ebx,3                       ; ebx=offset of int gate dsc in IDT
        mov cx,[cs:idt+ebx+2]           ; int gate[2]=selector
        mov dx,[cs:idt+ebx+6]           ; int gate[6]=MSW of offset
        shl edx,16
        mov dx,[cs:idt+ebx]             ; int gate[0]=LSW of offset
        pop ebx
        jmp short .exit

.DPMIgetvect:
        push eax
        mov ax,204h                     ; ax=204h -> Get PM interrupt vector
        int 31h
        pop eax

.exit:
        ret

;
; Set interrupt vector
; In:
;   ds = data32sel
;   bl = interrupt number
;   cx:edx = selector:offset of handler
; Out:
;   CF=0 -> Interrupt vector successfully set
;   CF=1 -> Interrupt number is not present in the IDT (int number too high)
;
setvect:
        pushfd
        cli
        cmp byte [systemtype],3         ; Under DPMI the IDT is not part of
        je short .DPMIsetvect           ;  this program in memory

        push eax                        ; Check if int is in the IDT
        mov ax,[idtr]                   ; idtr[0] contains the IDT limit
        shr ax,3                        ; An int gate is 8 bytes in size
        cmp bl,al                       ; ax < 256, because highest int < 256
        pop eax                         ; Highest int that is allowed is
        ja short .errexit               ;  IDT limit/8, not IDT size/8

        push ebx
        and ebx,0ffh
        push edx
        shl ebx,3                       ; ebx=offset of int gate dsc in IDT
        mov [idt+ebx+2],cx              ; int gate[2]=selector
        mov [idt+ebx],dx                ; int gate[0]=LSW of offset
        shr edx,16
        mov [idt+ebx+6],dx              ; int gate[6]=MSW of offset
        pop edx
        pop ebx
        jmp short .exit

.DPMIsetvect:
        push eax
        mov ax,205h                     ; ax=205h -> Set PM interrupt vector
        int 31h
        pop eax
        jnc short .exit

.errexit:
        popfd
        stc
        ret
.exit:
        popfd
        clc
        ret

;
; Map a region of physical addresses to linear addresses
; In:
;   ds = data32sel
;   eax = size of region to map in bytes
;   edx = physical address of memory
; Out:
;   CF=0 -> Memory has been successfully mapped
;     edx = linear address that corresponds with physical memory
;   CF=1 -> Not enough memory to create page tables / Page directory is full
;     edx = ?
;
map_phys_addr:
        push eax
        push ebx
        push ecx
        push esi
        push edi
        push es
        cmp byte [systemtype],3         ; Under DPMI the page tables are not
        je near .DPMImap                ;  part of this program in memory
        cmp byte [systemtype],2
        je short .VCPImap

        clc                             ; Under raw and XMS paging is not
        jmp .exit                       ;  enabled -> lin addr=phys addr

.VCPImap:
        add eax,0fffh                   ; Round region up to 4K units
        and eax,0fffff000h
        shr eax,10                      ; eax=amount mem needed to map mem;
        mov ecx,eax                     ;  for every KB 1 byte in page table

        mov ebx,[himembase]
        mov esi,ebx
        add esi,0fffh                   ; Check free memory base alignment
        and esi,0fffff000h
        sub esi,ebx                     ; esi=offset to get at 4K alignment
        add eax,esi                     ;  from current free high mem base
                                        ; Extra mem in eax for aligning ->
                                        ;  it is allowed to do aligning
        call gethimem
        jc near .errexit
        add eax,[code32a]               ; eax=linear address of 1st page
        add eax,0fffh                   ;  table entry
        and eax,0fffff000h              ; Align address on 4KB byte boundary
        mov edi,eax                     ; edi=lin address of 1st table entry
        push ecx                        ; Save ecx and edi for making page
        push edi                        ;  table entries

        mov es,[zerosel]
        mov edi,[VCPIdir_base]
        mov ecx,4096/4
        xor eax,eax                     ; Search for first 0 (=free) entry
        cld
        repne scasd                     ; Compare eax with es:edi while not 0
        jne near .errexit               ; ZF=0 (ecx=0) -> No page dir entries
                                        ;  free
        sub edi,4                       ; edi=lin address of 1st free entry
        sub edi,[VCPIdir_base]
        shl edi,20                      ; edi=lin address of mapped phys mem
        mov eax,edi

        pop edi                         ; Values of ecx and edi are needed
        pop ecx                         ;  again
        push ecx
        push edi
        and edx,0fffff000h              ; Clear page attributes
        or edx,7                        ; User (=page addressable regardless
        add ecx,edi                     ;  the CPL); Read and write; Present
.next_page:                             ; Amount mem that page tables are 
        mov [es:edi],edx                ;  created for is diff ecx and edi
        add edx,4096                    ; Next page resides 4K higher in mem
        add edi,4
        cmp edi,ecx
        jb short .next_page
        mov edx,eax                     ; edx=lin address of mapped phys mem

        pop ebx                         ; ebx=lin address of 1st table entry
        shr ebx,12                      ; ebx=number of 4KB page where in mem
        mov esi,[VCPIdir_base]
        mov edi,edx                     ; edi=lin address of mapped phys mem
        shr edi,20                      ; edi=offset of entry of 1st page
        add edi,[VCPIdir_base]          ;  table in page dir (=1MB units mem)
        pop ecx                         ; ecx=size of region to map in KB
        add ecx,0fffh                   ; Round up to 4MB units
        shr ecx,12                      ; ecx=number of 4MB regions to map
.next_table:
        mov eax,[es:ebx*4 + esi+4096]
        and eax,0fffff000h              ; Clear page attributes
        or eax,7                        ; User (=page addressable regardless
        stosd                           ;  the CPL); Read and write; Present
        inc ebx                         ; For each 4MB 4 bytes in directory
        loop .next_table                ;  -> for each MB 1 byte

;       clc                             ; Cleared by OR
        jmp short .exit

.DPMImap:
        mov cx,dx
        shld ebx,edx,16                 ; bx:cx=physical address of memory
        mov di,ax
        shld esi,eax,16                 ; si:di=size of region to map (bytes)
        mov ax,800h                     ; ax=800h -> Map physical address to
        int 31h                         ;  linear address
        jc short .errexit
        shrd edx,ebx,16                 ; bx:cx=lin addr to access phys mem
        mov dx,cx
        clc                             ; CF set only on error
        jmp short .exit

.errexit:
        stc
.exit:
        pop es
        pop edi
        pop esi
        pop ecx
        pop ebx
        pop eax
        ret

;
; Shell to DOS
; In:
;   ds = data32sel
;   esi = offset of 0 terminated string (program to start via COMMAND.COM)
;   esi = 0 -> Only COMMAND.COM will be started
; Out:
;   CF=0 -> No error
;     lomembase = lomembase + 16 and aligned on paragraph boundary
;   CF=1 -> Error
;
; Note:
;   int 21h/ah=4ah places 1 byte + 2 words at first paragraph above lomembase
;   if lomembase is not already on paragraph boundary. These bytes are part
;   of the 16 byte Memory Control Block and must not be overwritten. dosshell
;   saves this MCB ("add lomembase,16").
;
; Offsets in Program Segment Prefix:
; 5Ch -> default unopened FCB #1
; 6Ch -> default unopened FCB #2
; 80h -> count of bytes in command tail (inluding 1st space, not last cr)
; 81h -> all characters entered after the prog name, followed by a cr
;
; Parameter Block:
; dw      env_seg       ; segment of environment block
; dd      tail          ; segment:offset of command tail (counter)
; dd      fcb1          ; segment:offset of default FCB #1
; dd      fcb2          ; segment:offset of default FCB #2
;
dosshell:
        push eax
        push ecx
        push esi
        push edi
        push es
        cld
                                        ; Make sure allocated low mem won't
        mov ax,[psp_seg]                ;  be overwritten by DOS
        mov [v86r_es],ax                ; es=segment of the block
        mov eax,[lomembase]             ; eax=offset 1st byte in free low mem
        add eax,0fh                     
        and al,0f0h                     ; eax=idem, rounded up to para bound
        mov [lomembase],eax             ; Align lomembase on paragraph
                                        ;  boundary -> MCB at lomembase
        add eax,[code32a]
        shr eax,4                       ; ax=offset in paragraph units
        sub ax,[v86r_es]                ; ax=new block size in paragraphs
        mov [v86r_bx],ax                ; bx=new block size in paragraphs
        mov byte [v86r_ah],4ah          ; ah=4ah -> Modify allocated mem block
        mov al,21h
        int RMCALL_VECT
        test word [v86r_flags],1        ; Error?
;       jnz near .errexit               ; ?: Under CWSDPMI ah=4ah fails
        add dword [lomembase],16        ; Save MCB

        cmp esi,0                       ; Start only COMMAND.COM?
        jne short .startprog

        mov byte [cmdtail],0            ; No arguments for COMMAND.COM
                                        ; Copy segment addresses of:
        mov cx,[env_seg]                ;  environment (setting paramblock[0]
        mov [paramblock],cx             ;  to 0 also indicates that EXEC func
        mov ax,[psp_seg]                ;  must use copy of current env), 
        mov [paramblock+8],ax           ;  default unopened FCB #1 and
        mov [paramblock+12],ax          ;  default unopened FCB #2 of this
                                        ;  program to param block of 'child'.
        jmp short .get_comspec
.startprog:                             ; Start program at ds:esi
        mov ax,ds
        mov es,ax                       ; es=ds for scasb
        mov edi,esi                     ; Get string length of program name
        mov ecx,0ffffffffh              ; Search max 4GB
        mov al,0                        ; Scan for 0 (=end of string)
        repne scasb                     ; Compare al with es:edi
        not ecx                         ; When scasb stops, edi points 1 byte
        dec ecx                         ;  past the 0; that's 1 byte too far
                                        ; ecx=length of program name

        mov edi,cmdtail+1               ; Tail starts 1 byte past 'cmdtail'
        add edi,[presettail]            ; Save preset bytes in 'cmdtail'
        rep movsb                       ; Copy program's name to tail

        sub edi,cmdtail+1               ; edi=length of command tail (Don't
        mov ax,di                       ;  count command tail counter)
        mov [cmdtail],al                ; Length cannot be larger than 127

        mov cx,env_seg
        mov [paramblock],cx
        mov ax,[psp_seg]
        mov [paramblock+8],ax
        mov [paramblock+12],ax

.get_comspec:                           ; Get COMSPEC environment variable
        mov es,[envsel]
        mov al,0                        ; Scan for 0 (=end of string)
        xor edi,edi

.search_comspec:
        mov esi,comspec
        cmpsd                           ; Compare ds:esi with es:edi; 'COMS'
        jne short .end_of_string
        cmpsd                           ; Compare ds:esi with es:edi; 'PEC='
        je short .exec
        jmp short .errexit

.end_of_string:
        scasb                           ; Compare es:edi with al; scan until
        jne short .end_of_string        ;  end of environment string is found
        jmp short .search_comspec

.exec:
        mov word [v86r_ax],4b00h        ; ax=4b00h -> EXEC/Load and execute
        mov word [v86r_bx],paramblock   ; program 
        mov [v86r_dx],di
        mov [v86r_ds],cx                ; ds:dx=pointer to ASCIIz filename
        mov word [v86r_es],code32       ; es:bx=pointer to parameter block
        mov al,21h
        int RMCALL_VECT
        bt word [v86r_flags],0          ; Copy virtual CF value to CF
        jmp short .exit
.errexit:
        stc
.exit:
        pop es
        pop edi
        pop esi
        pop ecx
        pop eax
        ret

;
; Print '$' terminated string via DOS
; In:
;   ds = data32sel
;   edx = offset of '$' terminated string (string must be in low memory)
;
dosprint:
        push eax
        push edx
        add edx,[code32a]
        mov al,dl
        and eax,0fh
        shr edx,4
        mov [v86r_ds],dx
        mov [v86r_dx],ax
        mov byte [v86r_ah],9            ; ah=9 -> Print string at ds:dx
        mov al,21h
        int RMCALL_VECT
        pop edx
        pop eax
        ret

;
; Put 0 terminated string to es:edi
; In:
;   ds:esi = address of 0 terminated string
;   bl = character attribute
; Out:
;   edi = 1 byte past last written character (attribute)
;
putstr:
        push ecx
        push esi
        push eax
        push edi
        push es
        mov ax,ds                       ; First, get string length
        mov es,ax                       ; es=ds for scasb
        mov edi,esi
        mov ecx,0ffffffffh              ; Search max 4GB
        mov al,0                        ; Scan for 0 (=end of string)
        cld
        repne scasb                     ; Compare al with es:edi
        not ecx
        dec ecx                         ; When scasb stops, edi points 1 byte
                                        ;  past the 0; that's 1 byte too far
        pop es                          ; ecx=string length
        pop edi
        mov ah,bl                       ; bl=character attribute
.putchar:
        lodsb
        stosw
        loop .putchar
        pop eax
        pop esi
        pop ecx
        ret
