This is the past DOS era of the compilation of source code, although has passed, but for the study of the assembly is still helpful, assembly language is just a basic programmer language, most people can grasp, not necessarily in-depth research.
Name Clean
Page 55,132
Title ' Clean---Filter text file '
;
; Clean---A utility to filter text files.
; This program removes the all control codes except
; For line feeds, carriage returns, and form
; Feeds, strips off the high bit of all characters,
; and Expands tabs. Can is used to make a WordStar
; File acceptable for other screens or line editors,
; and vice versa.
;
; Version 1.1 Dec blocking/deblocking
; Version 1.0 Nov 83
;
; Copyright (c) 1983 by Ray Duncan
CR Equ 0DH; ASCII Carriage return
LF equ 0ah; ASCII line Feed
FF equ 0ch; ASCII Form Feed
EOF Equ 01ah; End of file marker
tab EQU 09h; ASCII Tab character
command equ 80h; buffer for command tail
Blksize equ 1024; blocking/deblocking size
CSEG segment para public ' CODE '
Assume Cs:cseg,ds:data,es:data,ss:stack
Clean proc Far, entry point from Pc-dos
Push DS; save ds:0000 for final
XOR Ax,ax; return to Pc-dos
Push AX
mov ax,data; make our data segment
MOV es,ax; addressable via ES Register
Call infile, get path and file spec.
; For input file
MOV ax,es set ds=es for remainder
MOV ds,ax;
Jnc Clean1 Jump, got acceptable name
mov dx,offset msg4; missing or illegal filespec,
JMP clean9;p rint error message and exit.
Clean1:call outfile, set up output file name
Call Open_input. Now try to open input file
JNC clean2; jump,opened input OK
mov dx,offset MSG1; Open of input file failed,
JMP clean9;p rint error msg and exit.
Clean2:
Call Open_output, try to open output file.
Jnc clean25; jump,opened OK
mov dx,offset msg2; Open of output file failed,
JMP clean9;p rint error message and exit.
CLEAN25:; set up buffers
Call Init_buffs
Call sign_on;p rint ident and file names
; files successfully opened,
CLEAN3:; now filter the file.
Call Get_char, read 1 character from input.
And al,07fh strip off the high bit
CMP al,20h is it's it a control code?
Jae Clean4 no,write it to new file
; Yes it control code,
CMP Al,eof's It end of file marker?
Je clean6 yes,jump to close files.
CMP Al,tab is it a tab command?
JZ Clean5 yes,jump to special processing.
CMP AL,CR; If control code other than
Je clean35 tab or end-of-file mark, throw
CMP al,ff; it away unless it is a
Je clean35 form feed, carriage return,
CMP Al,lf or line feeds.
Jne CLEAN3
Clean35:; If It is one of the those three,
MOV column,0; incidentally initialize
JMP clean45 column count for tab processor.
CLEAN4:; count alphanumeric chars. Sent.
INC column
Clean45:; write this character to
Call Put_char output file,
Jnc CLEAN3 If CY not set, write is
; ok so go get next char.
Clean47:
Call Close_input if CY set, disk are full
Call Close_output. So close files and exit
MOV dx,offset MSG5 with error message.
JMP Clean9
CLEAN5:;p rocess Tab character
mov ax,column; let dx:ax=column count
Cwd
mov cx,8;d ivide it by eight ...
Idiv CX
Sub cx,dx; remainder is in DX.
Add column,cx Update column pointer.
Clean55:; 8 minus the remainder
Push CX; gives us the number of
mov al,20h; spaces to send out to
Call Put_char, move to the next tab position
Pop cx; restore space count
JC Clean47 Jump If disk is full
Loop Clean55
JMP short clean3. Get Next character
CLEAN6:; End of file detected,
Call Put_char; write End-of-file marker,
JC Clean47; Jump if disk is full
Call Flush_buffs, write remaining data to disk
JC Clean47 If CY Set,disk is full
; otherwise file was written ok
Call Close_input, close input and output
Call Close_output, files.
mov dx,offset msg3; addr of Success message,
Clean9:;p rint and return
mov ah,9; control to Pc-dos
int 21h
Ret
Clean ENDP
infile proc near;p rocess name of input file
;D s:si <-addr command line
MOV si,offset command
; es:di <-Addr filespec Buffer
MOV Di,offset input_name
Cld
LODSB any command line present?
or Al,al return error status if not.
JZ infile4
Infile1: Scan over leading blanks
LODSB to file name
CMP AL,CR If we hit carriage return
JZ infile4, filename is missing.
CMP al,20h is this a blank?
JZ Infile1 If so keep scanning.
Infile2:; found the name,
STOSB move last char. to output
, file name buffer.
LODSB check next character, found
CMP AL,CR carriage return yet?
Je infile3 yes,exit with success code
CMP al,20h is this a blank?
Jne Infile2 if not keep moving chars.
Infile3:; Exit with carry =0
CLC; For Success Flag
Ret
Infile4:; Exit with carry =1
STC; for error flag
Ret
InFile ENDP
outfile proc near; set up path and file
CLD; name for output file.
mov cx,64 length to move
mov si,offset input_name; source addr
mov di,offset output_name;d est addr
Rep MOVSB; Transfer the string
MOV Di,offset output_name
Outfile1:; scan string looking for
mov Al,[di]; "." Marking start of extension
or Al,al or zero byte marking name end.
JZ Outfile2 if either is found,jump.
CMP al, '. '
Je outfile2 bump string pointer, loop
Inc di; If neither '. ' or zero found.
JMP Outfile1
Outfile2: Found zero or '. ', force the
; Extension of the output file
; to '. CLN '
MOV Si,offset outfile_ext
MOV cx,5
Rep MOVSB
RET; back to caller
OutFile ENDP
Open_input proc near; open input file
;D s:dx=addr filename
MOV Dx,offset input_name
MOV al,0; Al=0 for Read Only
mov ah,3dh; function 3dh=open
int 21h; handle returned in AX,
MOV input_handle,ax; save it for later.
RET; CY is set if error
Open_input ENDP
Open_output proc near; open output file
;D s:dx=addr filename
MOV Dx,offset output_name
MOV al,1; Al=1 for write only
mov ah,3ch; function 3ch=make or
int 21h; Truncate existing file
; handle returned in AX
MOV Output_handle,ax;save it for later.
RET; return cy=true if error
Open_output ENDP
Close_input proc near; close input file
MOV bx,input_handle; Bx=handle
MOV Ah,3eh
int 21h
Ret
Close_input ENDP
Close_output proc near; close output file
MOV bx,output_handle; Bx=handle
MOV Ah,3eh
int 21h
Ret
Close_output ENDP
Get_char proc near; get one character from input buffer
MOV bx,input_ptr
CMP bx,blksize
Jne get_char1
Call Read_block
MOV bx,0
GET_CHAR1:
MOV AL,[INPUT_BUFFER+BX]
Inc BX
MOV input_ptr,bx
Ret
Get_char ENDP
PUT_CHAR proc near;p ut one character into output buffer
MOV bx,output_ptr
mov [output_buffer+bx],al
Inc BX
MOV output_ptr,bx
CMP bx,blksize, buffer full yet?
Jne put_char1; no,jump
Call Write_block; yes,write
RET; return CY as status code
PUT_CHAR1:
CLC; return CY clear for OK status
Ret
Put_char ENDP
Read_block proc Near
MOV bx,input_handle; read a-block of input
MOV cx,blksize
MOV Dx,offset input_buffer
MOV ah,3fh
int 21h
Jnc read_block1 Jump if no error status
MOV ax,0; Simulate a zero length read if error
Read_block1:
CMP Ax,blksize is full buffer read in?
Je read_block2; yes,jump
mov bx,ax no, store end-of-file mark
mov byte ptr [input_buffer+bx],eof
Read_block2:
XOR Ax,ax; Initialize input buffer pointer
MOV Input_ptr,ax
Ret
Read_block ENDP
Write_block proc near; write blocked output (blksize bytes)
MOV Dx,offset output_buffer
MOV cx,blksize
MOV Bx,output_handle
MOV ah,40h
int 21h
XOR BX,BX initialize pointer to blocking buffer
MOV output_ptr,bx
CMP Ax,blksize was correct length written?
Jne Write_block1 No,disk must is full
CLC; Yes,return cy=0 indicating all OK
Ret
Write_block1:;d ISK is full, return CY =1
STC; as error code
Ret
Write_block ENDP
init_buffs proc Near
Call Read_block Read 1st block of input
XOR Ax,ax; initialize pointer to output
mov output_ptr,ax; output blocking buffer
Ret
Init_buffs ENDP
flush_buffs proc near; write any data in output buffer to disk
MOV cx,output_ptr
or CX,CX
JZ flush_buffs1 Jump,buffer is empty
MOV Bx,output_handle
MOV Dx,offset output_buffer
MOV ah,40h
int 21h
CMP Ax,output_ptr was write successful?
JNZ FLUSH_BUFFS2; no,jump
FLUSH_BUFFS1:
CLC; Yes,return cy=0 for
a ret; a success flag.
FLUSH_BUFFS2:;d ISK is full so write failed,
STC; return Cy=1 as Error flag
Ret
Flush_buffs ENDP
sign_on proc near;p rint sign-on message
mov dx,offset msg6, title ...
MOV ah,9
int 21h
MOV dx,offset msg7 input file:
MOV ah,9
int 21h
MOV Dx,offset input_name
Call Pasciiz
mov dx,offset msg8; output file:
MOV ah,9
int 21h
MOV Dx,offset output_name
Call Pasciiz
MOV Dx,offset MSG9
MOV ah,9
int 21h
Ret
sign_on ENDP
Pasciiz proc near; call Dx=offset of Asciiz string
MOV bx,dx which'll is printed on standard output
PASCIIZ1:
MOV DL,[BX]
or DL,DL
JZ pasciiz9
CMP dl, ' A '
JB PASCIIZ2
CMP dl, ' Z '
JA pasciiz2
or dl,20h
PASCIIZ2:
MOV ah,2
int 21h
Inc BX
JMP PASCIIZ1
Pasciiz9:
Ret
Pasciiz ENDP
Cseg ends
Data segment para public ' data '
Input_name db DUP (0); buffer for input filespec
Output_name db DUP (0); buffer for output filespec
Input_handle DW 0; token returned by PCDOs
Output_handle DW 0; token returned by PCDOs
Input_ptr DW 0;p ointer to input blocking buffer
Output_ptr DW 0;p ointer to output blocking buffer
Outfile_ext db '. CLN ', 0; extension for filtered file
Column DW 0; column count for tab processing
MSG1 DB CR,LF
DB ' cannot find input file. '
DB Cr,lf, ' $ '
MSG2 DB CR,LF
DB ' Failed to open output file. '
DB Cr,lf, ' $ '
MSG3 DB CR,LF
DB ' File processing completed '
DB Cr,lf, ' $ '
MSG4 DB CR,LF
DB ' Missing file name. '
DB Cr,lf, ' $ '
MSG5 DB CR,LF
DB ' Disk is full. '
DB Cr,lf, ' $ '
MSG6 DB CR,LF
DB ' Clean Word processing File '
DB CR,LF
DB ' Copyright (c) 1983 Laboratory Microsystems Inc. '
DB Cr,lf, ' $ '
Msg7 db cr,lf, ' Input file: $ '
MSG8 db cr,lf, ' Output file: $ '
MSG9 db cr,lf, ' $ '
Input_buffer db blksize dup (?) ; Buffer for deblocking of data
; from input file
Output_buffer db blksize dup (?) ; Buffer for blocking of data
; Sent to output file
Data ends
Stack segment para stack ' stack '
DB DUP (?)
Stack ends
End Clean