1053 lines
35 KiB
Text
1053 lines
35 KiB
Text
;===========================================================================
|
|
; Copyright (c) 1990-1999 Info-ZIP. All rights reserved.
|
|
;
|
|
; See the accompanying file LICENSE, version 1999-Oct-05 or later
|
|
; (the contents of which are also included in zip.h) for terms of use.
|
|
; If, for some reason, both of these files are missing, the Info-ZIP license
|
|
; also may be found at: ftp://ftp.cdrom.com/pub/infozip/license.html
|
|
;===========================================================================
|
|
; This is a 680x0 assembly language translation of the Info-ZIP source file
|
|
; deflate.c, by Paul Kienitz. The function longest_match is based in part
|
|
; on match.a by Carsten Steger, which in turn is partly based on match.s
|
|
; for 386 by Jean-loup Gailly and Kai Uwe Rommel. Mostly, however, this
|
|
; material is based on deflate.c, by Gailly, Rommel, and Igor Mandrichenko.
|
|
; This code is not commented very much; see deflate.c for comments that explain
|
|
; what the functions are doing.
|
|
;
|
|
; The symbols that can be used to select different versions are as follows:
|
|
;
|
|
; CPU020 if defined, use 68020 instructions always.
|
|
;
|
|
; CPUTEST if defined, check at runtime for CPU type. Another symbol
|
|
; specifying the platform-specific test must be used with this.
|
|
; If neither of these is defined, use 68000 instructions only.
|
|
; Runtime test is nonportable; it is different for each OS.
|
|
;
|
|
; AMIGA use Amiga-specific test for 68020, if CPUTEST defined. Also
|
|
; tells it that registers d0/a0/d1/a1 are not preserved by
|
|
; function calls. At present, if AMIGA is not defined, it
|
|
; causes functions to preserve all registers. ALL OF THIS CODE
|
|
; CURRENTLY ASSUMES THAT REGISTERS D2-D7/A2-A6 WILL BE PRESERVED
|
|
; BY ANY FUNCTIONS THAT IT CALLS.
|
|
;
|
|
; DYN_ALLOC should be defined here if it is defined for C source; tells us
|
|
; that big arrays are allocated instead of static.
|
|
;
|
|
; WSIZE must be defined as the same number used for WSIZE in the C
|
|
; source, and must be a power of two <= 32768. As elsewhere,
|
|
; the default value is 32768.
|
|
;
|
|
; INT16 define this if ints are 16 bits; otherwise 32 bit ints assumed.
|
|
;
|
|
; SMALL_MEM define this if it is defined in the C source; otherwise it uses
|
|
; the MEDIUM_MEM model. BIG_MEM and MMAP are *not* supported.
|
|
; The FULL_SEARCH option in deflate.c is also not supported.
|
|
;
|
|
; DEBUG activates some tracing output, as in the C source.
|
|
;
|
|
; QUADLONG this selects a different version of the innermost longest_match
|
|
; loop code for 68020 operations, comparing bytes four at a time
|
|
; instead of two at a time. It seems to be a tiny bit faster on
|
|
; average, but it's slower often enough that one can't generalize.
|
|
;
|
|
; This code currently assumes that function results are returned in D0 for
|
|
; all platforms. It assumes that args to functions are pushed onto the stack,
|
|
; last arg first. It also currently assumes that all C symbols have an
|
|
; underscore prepended when referenced from assembly.
|
|
|
|
IFND CPU020
|
|
IFND CPUTEST
|
|
CPU000 equ 1
|
|
ENDC
|
|
ENDC
|
|
|
|
; Use these macros for accessing variables of type int:
|
|
IFD INT16
|
|
MOVINT MACRO
|
|
move.w \1,\2
|
|
ENDM
|
|
CLRINT MACRO
|
|
clr.w \1
|
|
ENDM
|
|
INTSIZE equ 2
|
|
ELSE ; !INT16
|
|
MOVINT MACRO
|
|
move.l \1,\2
|
|
ENDM
|
|
CLRINT MACRO
|
|
clr.l \1
|
|
ENDM
|
|
INTSIZE equ 4
|
|
ENDC
|
|
|
|
IFD DYN_ALLOC
|
|
BASEPTR MACRO
|
|
move.l \1,\2
|
|
ENDM
|
|
ELSE
|
|
BASEPTR MACRO
|
|
lea \1,\2
|
|
ENDM
|
|
ENDC
|
|
|
|
; constants we use, many of them adjustable:
|
|
|
|
MAX_MATCH equ 258
|
|
MIN_MATCH equ 3
|
|
TOO_FAR equ 4096
|
|
IFND WSIZE
|
|
WSIZE equ 32768
|
|
ENDC
|
|
WMASK equ WSIZE-1
|
|
MAX_DIST equ WSIZE-MAX_MATCH-MIN_MATCH-1
|
|
MIN_LOOKAHEAD equ MAX_MATCH+MIN_MATCH+1
|
|
; IFD BIG_MEM ; NOT supported -- type Pos needs to be 32 bits
|
|
;HASH_BITS equ 15
|
|
; ELSE
|
|
IFD SMALL_MEM
|
|
HASH_BITS equ 13
|
|
ELSE
|
|
HASH_BITS equ 14 ; default -- MEDIUM_MEM
|
|
ENDC
|
|
; ENDC ; BIG_MEM
|
|
HASH_SIZE equ 1<<HASH_BITS
|
|
HASH_MASK equ HASH_SIZE-1
|
|
H_SHIFT equ (HASH_BITS+MIN_MATCH-1)/MIN_MATCH
|
|
B_SLOW equ 1
|
|
B_FAST equ 2
|
|
ZE_MEM equ 4
|
|
EOF equ -1
|
|
|
|
; struct config is defined by these offsets:
|
|
Good_length equ 0
|
|
Max_lazy equ 2
|
|
Nice_length equ 4
|
|
Max_chain equ 6
|
|
Sizeof_config equ 8
|
|
|
|
|
|
; external functions we call:
|
|
xref _ct_tally ; int ct_tally(int, int)
|
|
xref _flush_block ; unsigned long F(char *, unsigned long, int)
|
|
xref _ziperr ; void ziperr(int, char *)
|
|
xref _error ; void error(char *)
|
|
xref _calloc ; stdlib function: void *calloc(size_t, size_t)
|
|
xref _free ; stdlib function: void free(void *)
|
|
IFD DEBUG
|
|
xref _fputc ; stdio function: int fputc(int, FILE *)
|
|
xref _stderr ; pointer to FILE, which we pass to fputc
|
|
ENDC
|
|
|
|
; our entry points:
|
|
xdef _lm_init ; void lm_init(int level, unsigned short *flags)
|
|
xdef _lm_free ; void lm_free(void)
|
|
xdef _deflate ; void deflate(void) ...the big one
|
|
xdef _fill_window ; this line is just for debugging
|
|
|
|
|
|
; ============================================================================
|
|
; Here is where we have our global variables.
|
|
|
|
section deflatevars,data
|
|
|
|
; external global variables we reference:
|
|
xref _verbose ; signed int
|
|
xref _level ; signed int
|
|
xref _read_buf ; int (*read_buf)(char *, unsigned int)
|
|
|
|
; global variables we make available:
|
|
|
|
xdef _window
|
|
xdef _prev
|
|
xdef _head
|
|
xdef _window_size
|
|
xdef _block_start
|
|
xdef _strstart
|
|
|
|
IFD DYN_ALLOC
|
|
_prev: ds.l 1 ; pointer to calloc()'d unsigned short array
|
|
_head: ds.l 1 ; pointer to calloc()'d unsigned short array
|
|
_window: ds.l 1 ; pointer to calloc()'d unsigned char array
|
|
ELSE ; !DYN_ALLOC
|
|
_prev: ds.w WSIZE ; array of unsigned short
|
|
_head: ds.w HASH_SIZE ; array of unsigned short
|
|
_window: ds.b 2*WSIZE ; array of unsigned char
|
|
ENDC ; ?DYN_ALLOC
|
|
_window_size: ds.l 1 ; unsigned long
|
|
_block_start: ds.l 1 ; unsigned long
|
|
_strstart: ds.w INTSIZE/2 ; unsigned int
|
|
|
|
; Now here are our private variables:
|
|
|
|
IFD CPUTEST
|
|
is020: ds.w 1 ; bool: CPU type is '020 or higher
|
|
ENDC
|
|
ins_h: ds.w 1 ; unsigned short
|
|
sliding: ds.w 1 ; bool: the file is read a piece at a time
|
|
eofile: ds.w 1 ; bool: we have read in the end of the file
|
|
max_lazy_match: ds.w 1 ; unsigned short
|
|
lookahead: ds.w 1 ; unsigned short
|
|
|
|
; These are NOT DECLARED AS STATIC in deflate.c, but currently could be:
|
|
max_chain_len: ds.w 1 ; unsigned short (unsigned int in deflate.c)
|
|
prev_length: ds.w 1 ; unsigned short (unsigned int in deflate.c)
|
|
good_match: ds.w 1 ; unsigned short (unsigned int in deflate.c)
|
|
nice_match: ds.w 1 ; unsigned short (signed int in deflate.c)
|
|
match_start: ds.w 1 ; unsigned short (unsigned int in deflate.c)
|
|
|
|
; This array of struct config is a constant and could be in the code section:
|
|
config_table: dc.w 0,0,0,0 ; level 0: store uncompressed
|
|
dc.w 4,4,8,4 ; level 1: fastest, loosest compression
|
|
dc.w 4,5,16,8 ; level 2
|
|
dc.w 4,6,32,32 ; level 3: highest to use deflate_fast
|
|
dc.w 4,4,16,16 ; level 4: lowest to use lazy matches
|
|
dc.w 8,16,32,32 ; level 5
|
|
dc.w 8,16,128,128 ; level 6: the default level
|
|
dc.w 8,32,128,256 ; level 7
|
|
dc.w 32,128,258,1024 ; level 8
|
|
dc.w 32,258,258,4096 ; level 9: maximum compression, slow
|
|
|
|
|
|
;;CAL_SH MACRO ; macro for calling zcalloc()
|
|
;; IFD INT16
|
|
;; move.w #2,-(sp)
|
|
;; move.w #\1,-(sp)
|
|
;; jsr _zcalloc
|
|
;; addq #4,sp
|
|
;; ELSE
|
|
;; pea 2
|
|
;; pea \1
|
|
;; jsr _zcalloc
|
|
;; addq #8,sp
|
|
;; ENDC
|
|
;; ENDM
|
|
|
|
CAL_SH MACRO ; Okay, we're back to using regular calloc()...
|
|
pea 2
|
|
pea \1
|
|
jsr _calloc
|
|
addq #8,sp
|
|
ENDM
|
|
|
|
; ============================================================================
|
|
; And here we begin our functions. match_init is for internal use only:
|
|
|
|
section deflate,code
|
|
|
|
match_init:
|
|
IFD CPUTEST ; now check for platform type
|
|
IFD AMIGA ; Amiga specific test for '020 CPU:
|
|
xref _SysBase
|
|
NOLIST
|
|
INCLUDE 'exec/execbase.i'
|
|
LIST
|
|
|
|
clr.w is020 ; default value is 68000
|
|
move.l _SysBase,a0
|
|
btst #AFB_68020,AttnFlags+1(a0)
|
|
beq.s cheap
|
|
move.w #1,is020
|
|
cheap:
|
|
ELSE ; !AMIGA
|
|
|
|
FAIL Write an '020-detector for your system here!
|
|
; On the Macintosh, I believe GetEnvironment() provides the information.
|
|
|
|
ENDC ; AMIGA
|
|
ENDC ; CPUTEST
|
|
rts ; match_init consists only of rts if CPUTEST unset
|
|
|
|
|
|
; ============================================================================
|
|
; Here is longest_match(), the function that the rest of this was built up
|
|
; from, the hottest hot spot in the program and therefore the most heavily
|
|
; optimized. It has two different versions, one for '020 and higher CPUs, and
|
|
; one for 68000/68010. It can test at runtime which version to use if you
|
|
; create a test function in match_init for your platform. Currently such a
|
|
; test is implemented for the Amiga. It can also be assembled to use '000 or
|
|
; '020 code only.
|
|
|
|
Cur_Match equr d0 ; unsigned int, kept valid as long
|
|
Best_Len equr d1 ; unsigned int, kept valid as long
|
|
Scan_Start equr d3 ; pair of bytes
|
|
Scan_End equr d4 ; pair of bytes
|
|
Limit equr d5 ; unsigned int
|
|
Chain_Length equr d6 ; unsigned int
|
|
Scan_Test equr d7 ; counter, pair of bytes sometimes
|
|
Scan equr a0 ; pointer to unsigned char
|
|
Match equr a1 ; pointer to unsigned char
|
|
Prev_Address equr a2 ; pointer to unsigned short
|
|
Scan_Ini equr a3 ; pointer to unsigned char
|
|
Match_Ini equr a5 ; pointer to unsigned char
|
|
; Note: "pair of bytes" means the two low order bytes of the register in
|
|
; 68020 code, but means the lowest and third lowest bytes on the 68000.
|
|
IFD AMIGA
|
|
SAVEREGS reg d3-d7/a2/a3/a5 ; don't protect d0/d1/a0/a1
|
|
ELSE ; !AMIGA
|
|
SAVEREGS reg d1/d3-d7/a0-a3/a5 ; protect all except d0 return val
|
|
ENDC ; ?AMIGA
|
|
; d2, a4, a6 not used... on Amiga, a4 is used by small-data memory model
|
|
|
|
|
|
longest_match:
|
|
movem.l SAVEREGS,-(sp)
|
|
|
|
; setup steps common to byte and word versions:
|
|
IFD INT16
|
|
and.l #$0000FFFF,Cur_Match ; upper half must be zero!
|
|
; we use an and.l down here for the sake of ATSIGN/REGARGS.
|
|
moveq #0,Limit ; so adding to Scan_Ini works
|
|
ENDC
|
|
move.w max_chain_len,Chain_Length
|
|
move.w prev_length,Best_Len
|
|
MOVINT _strstart,Limit
|
|
BASEPTR _prev,Prev_Address
|
|
BASEPTR _window,Match_Ini
|
|
move.l Match_Ini,Scan_Ini
|
|
addq #MIN_MATCH,Match_Ini ; optimizes inner loop
|
|
add.l Limit,Scan_Ini
|
|
sub.w #MAX_DIST,Limit
|
|
bhi.s limit_ok
|
|
moveq #0,Limit
|
|
limit_ok:
|
|
cmp.w good_match,Best_Len
|
|
blo.s length_ok
|
|
lsr.w #2,Chain_Length
|
|
length_ok:
|
|
subq.w #1,Chain_Length
|
|
|
|
IFD CPUTEST
|
|
tst.w is020 ; can we use '020 stuff today?
|
|
bne WORD_match
|
|
ENDC
|
|
|
|
IFND CPU020
|
|
|
|
; for 68000 or 68010, use byte operations:
|
|
moveq #0,Scan_Start ; clear 2nd & 4th bytes, use 1st & 3rd
|
|
moveq #0,Scan_End ; likewise
|
|
moveq #0,Scan_Test ; likewise
|
|
move.b (Scan_Ini),Scan_Start
|
|
swap Scan_Start ; swap is faster than 8 bit shift
|
|
move.b 1(Scan_Ini),Scan_Start
|
|
move.b -1(Scan_Ini,Best_Len.w),Scan_End
|
|
swap Scan_End
|
|
move.b 0(Scan_Ini,Best_Len.w),Scan_End
|
|
bra.s bdo_scan
|
|
|
|
blong_loop:
|
|
move.b -1(Scan_Ini,Best_Len.w),Scan_End
|
|
swap Scan_End
|
|
move.b 0(Scan_Ini,Best_Len.w),Scan_End
|
|
|
|
bshort_loop:
|
|
add.w Cur_Match,Cur_Match ; assert value before doubling < 32K
|
|
IFNE 32768-WSIZE
|
|
and.w #(WMASK*2),Cur_Match
|
|
ENDC
|
|
move.w (Prev_Address,Cur_Match.l),Cur_Match
|
|
cmp.w Limit,Cur_Match
|
|
dbls Chain_Length,bdo_scan
|
|
bra return
|
|
|
|
bdo_scan:
|
|
move.l Match_Ini,Match
|
|
add.l Cur_Match,Match
|
|
move.b -MIN_MATCH-1(Match,Best_Len.w),Scan_Test
|
|
swap Scan_Test
|
|
move.b -MIN_MATCH(Match,Best_Len.w),Scan_Test
|
|
cmp.l Scan_Test,Scan_End
|
|
bne.s bshort_loop
|
|
move.b -MIN_MATCH(Match),Scan_Test
|
|
swap Scan_Test
|
|
move.b -MIN_MATCH+1(Match),Scan_Test
|
|
cmp.l Scan_Test,Scan_Start
|
|
bne.s bshort_loop
|
|
move.w #(MAX_MATCH-3),Scan_Test
|
|
lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
|
|
|
|
bscan_loop:
|
|
cmp.b (Match)+,(Scan)+
|
|
dbne Scan_Test,bscan_loop
|
|
subq #1,Scan
|
|
|
|
sub.l Scan_Ini,Scan ; assert difference is 16 bits
|
|
cmp.w Best_Len,Scan
|
|
bls.s bshort_loop
|
|
MOVINT Scan,Best_Len
|
|
move.w Cur_Match,match_start
|
|
cmp.w nice_match,Best_Len
|
|
blo.s blong_loop
|
|
IFD CPUTEST
|
|
bra return
|
|
ENDC
|
|
|
|
ENDC ; !CPU020
|
|
|
|
IFND CPU000
|
|
MACHINE MC68020
|
|
|
|
; for 68020 or higher, use word operations even on odd addresses:
|
|
WORD_match:
|
|
move.w (Scan_Ini),Scan_Start
|
|
move.w -1(Scan_Ini,Best_Len.w),Scan_End
|
|
bra.s wdo_scan
|
|
|
|
wlong_loop:
|
|
move.w -1(Scan_Ini,Best_Len.w),Scan_End
|
|
|
|
wshort_loop:
|
|
and.w #WMASK,Cur_Match
|
|
move.w (Prev_Address,Cur_Match.w*2),Cur_Match ; '020 addressing mode
|
|
cmp.w Limit,Cur_Match
|
|
dbls Chain_Length,wdo_scan
|
|
bra.s return
|
|
|
|
wdo_scan:
|
|
move.l Match_Ini,Match
|
|
add.l Cur_Match,Match
|
|
cmp.w -MIN_MATCH-1(Match,Best_Len.w),Scan_End
|
|
bne.s wshort_loop
|
|
cmp.w -MIN_MATCH(Match),Scan_Start
|
|
bne.s wshort_loop
|
|
IFD QUADLONG
|
|
; By some measurements, this version of the code is a little tiny bit faster.
|
|
; But on some files it's slower. It probably pays off only when there are
|
|
; long match strings, and costs in the most common case of three-byte matches.
|
|
moveq #((MAX_MATCH-MIN_MATCH)/16),Scan_Test ; value = 15
|
|
lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
|
|
|
|
wscan_loop:
|
|
cmp.l (Match)+,(Scan)+ ; test four bytes at a time
|
|
bne.s odd
|
|
cmp.l (Match)+,(Scan)+
|
|
bne.s odd
|
|
cmp.l (Match)+,(Scan)+
|
|
bne.s odd
|
|
cmp.l (Match)+,(Scan)+
|
|
dbne Scan_Test,wscan_loop ; '020 can cache a bigger loop
|
|
odd:
|
|
subq #4,Scan
|
|
subq #4,Match
|
|
cmp.b (Match)+,(Scan)+ ; find good bytes in bad longword
|
|
bne.s even
|
|
cmp.b (Match)+,(Scan)+
|
|
bne.s even
|
|
cmp.b (Match)+,(Scan)+
|
|
beq.s steven
|
|
even: subq #1,Scan
|
|
ELSE ; !QUADLONG
|
|
moveq #((MAX_MATCH-MIN_MATCH)/2),Scan_Test ; value = 127
|
|
lea MIN_MATCH(Scan_Ini),Scan ; offset optimizes inner loop
|
|
|
|
wscan_loop:
|
|
cmp.w (Match)+,(Scan)+
|
|
dbne Scan_Test,wscan_loop
|
|
subq #2,Scan
|
|
move.b -2(Match),Scan_Test
|
|
cmp.b (Scan),Scan_Test
|
|
bne.s steven
|
|
addq #1,Scan
|
|
ENDC ; ?QUADLONG
|
|
steven:
|
|
sub.l Scan_Ini,Scan ; assert: difference is 16 bits
|
|
cmp.w Best_Len,Scan
|
|
bls.s wshort_loop
|
|
MOVINT Scan,Best_Len
|
|
move.w Cur_Match,match_start
|
|
cmp.w nice_match,Best_Len
|
|
blo.s wlong_loop
|
|
|
|
MACHINE MC68000
|
|
ENDC ; !CPU000
|
|
|
|
return:
|
|
MOVINT Best_Len,d0 ; return value (upper half should be clear)
|
|
movem.l (sp)+,SAVEREGS
|
|
rts
|
|
|
|
|
|
; =============================================================================
|
|
; This is the deflate() function itself, our main entry point. It calls
|
|
; longest_match, above, and some outside functions. It is a hot spot, but not
|
|
; as hot as longest_match. It uses no special '020 code.
|
|
|
|
; ================== Several macros used in deflate() and later functions:
|
|
|
|
; Arg 1 is D-reg that new ins_h value is to be left in,
|
|
; arg 2 is the byte value to be hashed into it, which must not be the same reg
|
|
UP_HASH MACRO
|
|
move.w ins_h,\1
|
|
asl.w #H_SHIFT,\1
|
|
eor.b \2,\1
|
|
and.w #HASH_MASK,\1 ; ((ins_h << H_SHIFT) ^ c) & HASH_MASK
|
|
move.w \1,ins_h ; ins_h = that
|
|
ENDM
|
|
|
|
; Arg 1 is scratch A, arg 2 is scratch D
|
|
IN_STR MACRO
|
|
move.l Strst,\2
|
|
addq.w #MIN_MATCH-1,\2
|
|
move.b (Window,\2.l),\2 ; window[strstart + MIN_MATCH - 1]
|
|
UP_HASH Head,\2
|
|
add.l Head,Head ; assert upper word is zero before add
|
|
BASEPTR _head,\1
|
|
add.l Head,\1
|
|
move.w (\1),Head ; hash_head = head[ins_h]
|
|
move.w Strst,(\1) ; head[ins_h] = strstart
|
|
move.l Strst,\2
|
|
IFNE WSIZE-32768
|
|
and.w #WMASK,\2
|
|
ENDC
|
|
add.w \2,\2 ; masks implicitly when WSIZE == 32768
|
|
move.w Head,(Prev,\2.l) ; prev[str_start & WMASK] = hash_head
|
|
ENDM
|
|
|
|
; Arg 1 is bool (int) EOF flag, flush_block result is in d0, trashes d1/a0/a1
|
|
FLUSH_B MACRO
|
|
IFC '\1','#0'
|
|
CLRINT -(sp)
|
|
ELSE
|
|
MOVINT \1,-(sp)
|
|
ENDC
|
|
move.l _block_start,d0
|
|
blt.s nenu\@
|
|
move.l Window,a0
|
|
add.l d0,a0
|
|
bra.s nun\@
|
|
nenu\@: sub.l a0,a0 ; if block_start < 0, push NULL
|
|
nun\@: sub.l Strst,d0
|
|
neg.l d0
|
|
move.l d0,-(sp)
|
|
move.l a0,-(sp)
|
|
jsr _flush_block
|
|
lea 8+INTSIZE(sp),sp
|
|
ENDM
|
|
|
|
; This expands to nothing unless DEBUG is defined.
|
|
; Arg 1 is a byte to be trace-outputted -- if it is d0 it must be a valid int
|
|
TRACE_C MACRO
|
|
IFD DEBUG
|
|
cmp.w #1,_verbose+INTSIZE-2 ; test lower word only
|
|
ble.s qui\@
|
|
IFNC '\1','d0'
|
|
moveq #0,d0
|
|
move.b \1,d0
|
|
ENDC
|
|
move.l _stderr,-(sp)
|
|
MOVINT d0,-(sp)
|
|
jsr _fputc
|
|
addq #4+INTSIZE,sp
|
|
qui\@:
|
|
ENDC ; DEBUG
|
|
ENDM
|
|
|
|
; ================== Here are the register vars we use, and deflate() itself:
|
|
|
|
Window equr a2 ; cached address of window[]
|
|
Prev equr a3 ; cached address of prev[]
|
|
Strst equr d7 ; strstart cached as a longword
|
|
Look equr d6 ; lookahead cached as short
|
|
Head equr d5 ; local variable hash_head, short
|
|
PrevL equr d4 ; prev_length cached as short
|
|
MatchL equr d3 ; local variable match_length, unsigned short
|
|
Avail equr d2 ; local variable available_match, bool
|
|
PrevM equr a5 ; local variable prev_match, int in an A-reg
|
|
|
|
IFD AMIGA
|
|
DEFREGS reg d2-d7/a2/a3/a5
|
|
ELSE
|
|
DEFREGS reg d0-d7/a0/a2/a3/a5 ; play it safe, preserve all regs
|
|
ENDC
|
|
|
|
|
|
_deflate: ; first, setup steps common to deflate and deflate_fast:
|
|
movem.l DEFREGS,-(sp)
|
|
IFD INT16
|
|
moveq #0,Strst ; make sure strstart is valid as a long
|
|
ENDC
|
|
moveq #0,Head ; ditto for hash_head
|
|
MOVINT _strstart,Strst
|
|
move.w lookahead,Look
|
|
move.w prev_length,PrevL
|
|
BASEPTR _window,Window
|
|
BASEPTR _prev,Prev
|
|
MOVINT _level,d0
|
|
cmp.w #3,d0
|
|
ble deflate_fast
|
|
moveq #MIN_MATCH-1,MatchL
|
|
moveq #0,Avail
|
|
|
|
look_loop:
|
|
tst.w Look
|
|
beq last_tally
|
|
IN_STR a0,d0
|
|
move.w MatchL,PrevL
|
|
move.w match_start,PrevM
|
|
move.w #MIN_MATCH-1,MatchL
|
|
|
|
tst.w Head
|
|
beq.s no_new_match
|
|
cmp.w max_lazy_match,PrevL
|
|
bhs.s no_new_match
|
|
move.w Strst,d0
|
|
sub.w Head,d0
|
|
cmp.w #MAX_DIST,d0
|
|
bhi.s no_new_match
|
|
move.w PrevL,prev_length ; longest_match reads these variables
|
|
MOVINT Strst,_strstart
|
|
MOVINT Head,d0 ; parm for longest_match
|
|
bsr longest_match ; sets match_start
|
|
cmp.w Look,d0 ; does length exceed valid data?
|
|
bls.s stml
|
|
move.w Look,d0
|
|
stml: move.w d0,MatchL ; valid length of match
|
|
cmp.w #MIN_MATCH,MatchL ; is the match only three bytes?
|
|
bne.s no_new_match
|
|
move.w match_start,d0
|
|
sub.w Strst,d0
|
|
cmp.w #-TOO_FAR,d0
|
|
bge.s no_new_match
|
|
moveq #MIN_MATCH-1,MatchL ; mark the current match as no good
|
|
|
|
no_new_match:
|
|
cmp.w #MIN_MATCH,PrevL
|
|
blo literal
|
|
cmp.w MatchL,PrevL
|
|
blo literal
|
|
; CHECK_MATCH Strst-1,PrevM,PrevL
|
|
MOVINT Strst,_strstart ; ct_tally reads this variable
|
|
move.l PrevL,d0
|
|
subq.w #MIN_MATCH,d0
|
|
MOVINT d0,-(sp)
|
|
move.l Strst,d0
|
|
sub.w PrevM,d0
|
|
subq.w #1,d0
|
|
MOVINT d0,-(sp)
|
|
jsr _ct_tally ; sets d0 true if we have to flush
|
|
addq #2*INTSIZE,sp
|
|
subq.w #3,PrevL ; convert for dbra (prev_length - 2)
|
|
sub.w PrevL,Look
|
|
subq.w #2,Look
|
|
insertmatch:
|
|
addq.w #1,Strst
|
|
IN_STR a0,d1 ; don't clobber d0
|
|
dbra PrevL,insertmatch
|
|
moveq #0,Avail
|
|
moveq #0,PrevL ; not needed?
|
|
moveq #MIN_MATCH-1,MatchL
|
|
addq.w #1,Strst
|
|
tst.w d0
|
|
beq refill
|
|
FLUSH_B #0
|
|
move.l Strst,_block_start
|
|
bra.s refill
|
|
|
|
literal:
|
|
tst.w Avail
|
|
bne.s yeslit
|
|
moveq #1,Avail
|
|
bra.s skipliteral
|
|
yeslit: TRACE_C <-1(Window,Strst.l)>
|
|
MOVINT Strst,_strstart ; ct_tally reads this variable
|
|
moveq #0,d0
|
|
move.b -1(Window,Strst.l),d0
|
|
MOVINT d0,-(sp)
|
|
CLRINT -(sp)
|
|
jsr _ct_tally
|
|
addq #2*INTSIZE,sp
|
|
tst.w d0
|
|
beq.s skipliteral
|
|
FLUSH_B #0
|
|
move.l Strst,_block_start
|
|
skipliteral:
|
|
addq.w #1,Strst
|
|
subq.w #1,Look
|
|
|
|
refill:
|
|
cmp.w #MIN_LOOKAHEAD,Look
|
|
bhs look_loop
|
|
bsr fill_window
|
|
bra look_loop
|
|
|
|
last_tally:
|
|
tst.w Avail
|
|
beq last_flush
|
|
MOVINT Strst,_strstart ; ct_tally reads this variable
|
|
moveq #0,d0
|
|
move.b -1(Window,Strst.l),d0
|
|
MOVINT d0,-(sp)
|
|
CLRINT -(sp)
|
|
jsr _ct_tally
|
|
addq #2*INTSIZE,sp
|
|
last_flush:
|
|
FLUSH_B #1
|
|
bra deflate_exit
|
|
|
|
; ================== This is another version used for low compression levels:
|
|
|
|
deflate_fast:
|
|
moveq #0,MatchL
|
|
moveq #MIN_MATCH-1,PrevL
|
|
flook_loop:
|
|
tst.w Look
|
|
beq flast_flush
|
|
|
|
IN_STR a0,d0
|
|
tst.w Head
|
|
beq.s fno_new_match
|
|
move.w Strst,d0
|
|
sub.w Head,d0
|
|
cmp.w #MAX_DIST,d0
|
|
bhi.s fno_new_match
|
|
move.w PrevL,prev_length ; longest_match reads these variables
|
|
MOVINT Strst,_strstart
|
|
MOVINT Head,d0 ; parm for longest_match
|
|
bsr longest_match ; sets match_start
|
|
cmp.w Look,d0 ; does length exceed valid data?
|
|
bls.s fstml
|
|
move.w Look,d0
|
|
fstml: move.w d0,MatchL ; valid length of match
|
|
|
|
fno_new_match:
|
|
cmp.w #MIN_MATCH,MatchL
|
|
blo fliteral
|
|
; CHECK_MATCH Strst,match_start,MatchL
|
|
MOVINT Strst,_strstart ; ct_tally reads this variable
|
|
move.l MatchL,d0
|
|
subq.w #MIN_MATCH,d0
|
|
MOVINT d0,-(sp)
|
|
move.l Strst,d0
|
|
sub.w match_start,d0
|
|
MOVINT d0,-(sp)
|
|
jsr _ct_tally ; sets d0 true if we have to flush
|
|
addq #2*INTSIZE,sp
|
|
sub.w MatchL,Look
|
|
cmp.w max_lazy_match,MatchL
|
|
bhi ftoolong
|
|
subq.w #2,MatchL
|
|
finsertmatch:
|
|
addq.w #1,Strst
|
|
IN_STR a0,d1 ; preserve d0
|
|
dbra MatchL,finsertmatch
|
|
moveq #0,MatchL ; not needed?
|
|
addq.w #1,Strst
|
|
bra.s flushfill
|
|
|
|
ftoolong:
|
|
add.w MatchL,Strst
|
|
moveq #0,MatchL
|
|
moveq #0,d1 ; preserve d0
|
|
move.b (Window,Strst.l),d1
|
|
move.w d1,ins_h
|
|
; My assembler objects to passing <1(Window,Strst.l)> directly to UP_HASH...
|
|
move.b 1(Window,Strst.l),Avail ; Avail is not used in deflate_fast
|
|
UP_HASH d1,Avail ; preserve d0
|
|
IFNE MIN_MATCH-3
|
|
FAIL needs to UP_HASH another MIN_MATCH-3 times, but with what arg?
|
|
ENDC
|
|
bra.s flushfill
|
|
|
|
fliteral:
|
|
TRACE_C <(Window,Strst.l)>
|
|
MOVINT Strst,_strstart ; ct_tally reads this variable
|
|
moveq #0,d0
|
|
move.b (Window,Strst.l),d0
|
|
MOVINT d0,-(sp)
|
|
CLRINT -(sp)
|
|
jsr _ct_tally ; d0 set if we need to flush
|
|
addq #2*INTSIZE,sp
|
|
addq.w #1,Strst
|
|
subq.w #1,Look
|
|
|
|
flushfill:
|
|
tst.w d0
|
|
beq.s frefill
|
|
FLUSH_B #0
|
|
move.l Strst,_block_start
|
|
frefill:
|
|
cmp.w #MIN_LOOKAHEAD,Look
|
|
bhs flook_loop
|
|
bsr fill_window
|
|
bra flook_loop
|
|
|
|
flast_flush:
|
|
FLUSH_B #1 ; sets our return value
|
|
|
|
deflate_exit:
|
|
MOVINT Strst,_strstart ; save back cached values
|
|
move.w PrevL,prev_length
|
|
move.w Look,lookahead
|
|
movem.l (sp)+,DEFREGS
|
|
rts
|
|
|
|
|
|
; =========================================================================
|
|
; void fill_window(void) calls the input function to refill the sliding
|
|
; window that we use to find substring matches in.
|
|
|
|
More equr Head ; local variable in fill_window
|
|
WindTop equr Prev ; local variable used for sliding
|
|
SlidIx equr PrevL ; local variable used for sliding
|
|
|
|
IFD AMIGA
|
|
FWREGS reg d2-d5/a2-a6 ; does NOT include Look and Strst
|
|
ELSE
|
|
FWREGS reg d1-d5/a0-a6 ; likewise
|
|
ENDC
|
|
; all registers available to be clobbered by the sliding operation:
|
|
; we exclude More, WindTop, SlidIx, Look, Strst, Window, a4 and a7.
|
|
SPAREGS reg d0-d3/a0-a1/a5-a6
|
|
SPCOUNT equ 8 ; number of registers in SPAREGS
|
|
|
|
|
|
_fill_window: ; C-callable entry point
|
|
movem.l Strst/Look,-(sp)
|
|
IFD INT16
|
|
moveq #0,Strst ; Strst must be valid as a long
|
|
ENDC
|
|
MOVINT _strstart,Strst
|
|
move.w lookahead,Look
|
|
BASEPTR _window,Window
|
|
bsr.s fill_window
|
|
MOVINT Strst,_strstart
|
|
move.w Look,lookahead
|
|
movem.l (sp)+,Strst/Look
|
|
rts
|
|
|
|
; strstart, lookahead, and window must be cached in Strst, Look, and Window:
|
|
fill_window: ; asm-callable entry point
|
|
movem.l FWREGS,-(sp)
|
|
tst.w eofile ; we put this up here for speed
|
|
bne fwdone
|
|
and.l #$FFFF,Look ; make sure Look is valid as long
|
|
fw_refill:
|
|
move.l _window_size,More ; <= 64K
|
|
sub.l Look,More
|
|
sub.l Strst,More ; Strst is already valid as long
|
|
cmp.w #EOF,More
|
|
bne.s notboundary
|
|
subq.w #1,More
|
|
bra checkend
|
|
|
|
notboundary:
|
|
tst.w sliding
|
|
beq checkend
|
|
cmp.w #WSIZE+MAX_DIST,Strst
|
|
blo checkend
|
|
IFGT 32768-WSIZE
|
|
lea WSIZE(Window),WindTop ; WindTop is aligned when Window is
|
|
ELSE
|
|
move.l Window,WindTop
|
|
add.l #WSIZE,WindTop
|
|
ENDC
|
|
move.l Window,d0
|
|
and.w #3,d0
|
|
beq.s isaligned
|
|
subq.w #1,d0
|
|
align: move.b (WindTop)+,(Window)+ ; copy up to a longword boundary
|
|
dbra d0,align
|
|
isaligned:
|
|
; This is faster than a simple move.l (WindTop)+,(Window)+ / dbra loop:
|
|
move.w #(WSIZE-1)/(4*SPCOUNT),SlidIx
|
|
slide: movem.l (WindTop)+,SPAREGS ; copy, 32 bytes at a time!
|
|
movem.l SPAREGS,(Window) ; a slight overshoot doesn't matter.
|
|
lea 4*SPCOUNT(Window),Window ; can't use (aN)+ as movem.l dest
|
|
dbra SlidIx,slide
|
|
BASEPTR _window,Window ; restore cached value
|
|
sub.w #WSIZE,match_start
|
|
sub.w #WSIZE,Strst
|
|
sub.l #WSIZE,_block_start
|
|
add.w #WSIZE,More
|
|
BASEPTR _head,a0
|
|
move.w #HASH_SIZE-1,d0
|
|
fixhead:
|
|
move.w (a0),d1
|
|
sub.w #WSIZE,d1
|
|
bpl.s headok
|
|
moveq #0,d1
|
|
headok: move.w d1,(a0)+
|
|
dbra d0,fixhead
|
|
BASEPTR _prev,a0
|
|
move.w #WSIZE-1,d0
|
|
fixprev:
|
|
move.w (a0),d1
|
|
sub.w #WSIZE,d1
|
|
bpl.s prevok
|
|
moveq #0,d1
|
|
prevok: move.w d1,(a0)+
|
|
dbra d0,fixprev
|
|
TRACE_C #'.'
|
|
|
|
checkend: ; assert eofile is false
|
|
MOVINT More,-(sp) ; assert More's upper word is zero
|
|
move.l Strst,d0
|
|
add.w Look,d0
|
|
add.l Window,d0
|
|
move.l d0,-(sp)
|
|
move.l _read_buf,a0
|
|
jsr (a0) ; refill the upper part of the window
|
|
addq #4+INTSIZE,sp
|
|
tst.w d0
|
|
beq.s iseof
|
|
cmp.w #EOF,d0
|
|
beq.s iseof
|
|
add.w d0,Look
|
|
cmp.w #MIN_LOOKAHEAD,Look
|
|
blo fw_refill ; eofile is still false
|
|
|
|
bra.s fwdone
|
|
iseof: move.w #1,eofile
|
|
fwdone: movem.l (sp)+,FWREGS
|
|
rts
|
|
|
|
|
|
; =========================================================================
|
|
; void lm_free(void) frees dynamic arrays in the DYN_ALLOC version.
|
|
|
|
xdef _lm_free ; the entry point
|
|
|
|
_lm_free:
|
|
IFD DYN_ALLOC
|
|
move.l _window,d0
|
|
beq.s lf_no_window
|
|
move.l d0,-(sp)
|
|
jsr _free
|
|
addq #4,sp
|
|
clr.l _window
|
|
lf_no_window:
|
|
move.l _prev,d0
|
|
beq.s lf_no_prev
|
|
move.l d0,-(sp)
|
|
jsr _free
|
|
move.l _head,(sp) ; reuse the same stack arg slot
|
|
jsr _free
|
|
addq #4,sp
|
|
clr.l _prev
|
|
clr.l _head
|
|
lf_no_prev:
|
|
ENDC
|
|
rts
|
|
|
|
; ============================================================================
|
|
; void lm_init(int pack_level, unsigned short *flags) allocates dynamic arrays
|
|
; if any, and initializes all variables so that deflate() is ready to go.
|
|
|
|
xdef _lm_init ; the entry point
|
|
|
|
Level equr d2
|
|
;Window equr a2 ; as in deflate()
|
|
IFD AMIGA
|
|
INIREGS reg d2/a2
|
|
ELSE
|
|
INIREGS reg d0-d2/a0-a1
|
|
ENDC
|
|
|
|
_lm_init:
|
|
MOVINT 4(sp),d0
|
|
move.l 4+INTSIZE(sp),a0
|
|
movem.l INIREGS,-(sp)
|
|
move.w d0,Level
|
|
cmp.w #1,Level
|
|
blt.s levelerr
|
|
bgt.s try9
|
|
bset.b #B_FAST,1(a0)
|
|
try9: cmp.w #9,Level
|
|
bgt.s levelerr
|
|
blt.s levelok
|
|
bset.b #B_SLOW,1(a0)
|
|
bra.s levelok
|
|
levelerr:
|
|
pea level_message
|
|
jsr _error ; never returns
|
|
levelok:
|
|
clr.w sliding
|
|
tst.l _window_size
|
|
bne.s gotawindowsize
|
|
move.w #1,sliding
|
|
move.l #2*WSIZE,_window_size
|
|
gotawindowsize:
|
|
|
|
BASEPTR _window,Window
|
|
IFD DYN_ALLOC
|
|
move.l Window,d0 ; fake tst.l
|
|
bne.s gotsomewind
|
|
CAL_SH WSIZE
|
|
move.l d0,Window
|
|
move.l d0,_window
|
|
bne.s gotsomewind
|
|
pea window_message
|
|
MOVINT #ZE_MEM,-(sp)
|
|
jsr _ziperr ; never returns
|
|
gotsomewind:
|
|
tst.l _prev
|
|
bne.s gotsomehead
|
|
CAL_SH WSIZE
|
|
move.l d0,_prev
|
|
beq.s nohead
|
|
CAL_SH HASH_SIZE
|
|
move.l d0,_head
|
|
bne.s gotfreshhead ; newly calloc'd memory is zeroed
|
|
nohead: pea hash_message
|
|
MOVINT #ZE_MEM,-(sp)
|
|
jsr _ziperr ; never returns
|
|
gotsomehead:
|
|
ENDC ; DYN_ALLOC
|
|
|
|
move.w #(HASH_SIZE/2)-1,d0 ; two shortwords per loop
|
|
BASEPTR _head,a0
|
|
wipeh: clr.l (a0)+
|
|
dbra d0,wipeh
|
|
gotfreshhead:
|
|
move.l Level,d0
|
|
IFEQ Sizeof_config-8
|
|
asl.l #3,d0
|
|
ELSE
|
|
mulu #Sizeof_config,d0
|
|
ENDC
|
|
lea config_table,a0
|
|
add.l d0,a0
|
|
move.w Max_lazy(a0),max_lazy_match
|
|
move.w Good_length(a0),good_match
|
|
move.w Nice_length(a0),nice_match
|
|
move.w Max_chain(a0),max_chain_len
|
|
CLRINT _strstart
|
|
clr.l _block_start
|
|
bsr match_init
|
|
|
|
clr.w eofile
|
|
MOVINT #WSIZE,-(sp) ; We read only 32K because lookahead is short
|
|
move.l Window,-(sp) ; even when int size is long, as if deflate.c
|
|
move.l _read_buf,a0 ; were compiled with MAXSEG_64K defined.
|
|
jsr (a0)
|
|
addq #4+INTSIZE,sp
|
|
move.w d0,lookahead
|
|
beq.s noread
|
|
cmp.w #EOF,d0
|
|
bne.s irefill
|
|
noread: move.w #1,eofile
|
|
clr.w lookahead
|
|
bra.s init_done
|
|
|
|
irefill:
|
|
move.w lookahead,d0
|
|
cmp.w #MIN_LOOKAHEAD,d0
|
|
bhs.s hashify
|
|
bsr _fill_window ; use the C-callable version
|
|
hashify:
|
|
clr.w ins_h
|
|
moveq #MIN_MATCH-2,d0
|
|
hash1: move.b (Window)+,d1
|
|
UP_HASH Level,d1
|
|
dbra d0,hash1
|
|
|
|
init_done:
|
|
movem.l (sp)+,INIREGS
|
|
rts
|
|
|
|
; strings for error messages:
|
|
hash_message dc.b 'hash table allocation',0
|
|
window_message dc.b 'window allocation',0
|
|
level_message dc.b 'bad pack level',0
|
|
|
|
end
|