'/* XDUPN.BAS Extract duplicate lines from a sorted text file */
'/*           By: Dale Thorn                                  */
'/*           Rev. 04.10.2002                                 */

'$include: 'basdef.h'
'$include: 'filekill.h'
'$include: 'fileopen.h'
'$include: 'longname.h'
'$include: 'messages.h'
'$include: 'midchar.h'
'$include: 'string.h'
'$include: 'parmstr1.h'

declare function ifn.wdup(ctmp2, itot, itmp1, itmp2)

'$include: 'basdef.bas'
'$include: 'filekill.bas'
'$include: 'fileopen.bas'
'$include: 'longname.bas'
'$include: 'messages.bas'
'$include: 'midchar.bas'
'$include: 'parmstr1.bas'
'$include: 'string.bas'
'$include: 'scrnparm.bas'

ccmd = ucase$(rtrim$(command$))         'get the user's command-line parameters
if ccmd = "" then                              'a command line was NOT supplied
   i = ifn.msgs("Usage:  XDUPN  filename  offset  comparelen", 5, 24, 79, 0, 1)
end if                              'display the usage message [above] and exit

iprm = parmstr1(ccmd, cfil, cnam, cext, cprm())  'parse the command-line params
if iprm < 0 or iprm > 1 then            'no. of parameters should be one or two
   i = ifn.msgs("Invalid number of parameters", 5, 24, 79, 1, 1)
end if                              'display the error message [above] and exit
if cnam = "" or len(cnam) > 8 or len(cext) > 3 or instr(cext, ".") then
   i = ifn.msgs("Invalid filename", 5, 24, 79, 1, 1)
end if                              'display the error message [above] and exit

ioffset = pdqvali(cprm(0)) + 1                     'get the text-compare offset
if ioffset < 1 then                              'text-compare offset NOT valid
   i = ifn.msgs("Invalid compare offset", 5, 24, 79, 1, 1)
end if                              'display the error message [above] and exit
if iprm = 1 then                           'a text-compare length was specified
   icmplen = pdqvali(cprm(1))                      'get the text-compare length
   if icmplen < 1 then                           'text-compare length NOT valid
      i = ifn.msgs("Invalid comparelen", 5, 24, 79, 1, 1)
   end if                           'display the error message [above] and exit
else                                   'a text-compare length was NOT specified
   icmplen = 999                        'default the text-compare length to 999
end if

i = ifn.open(1, cfil, "S", llof)       'open the source file in sequential mode
if llof < 0 then                                'user input a wildcard filespec
   i = ifn.msgs("Invalid filename", 5, 24, 79, 1, 1)             'beep and exit
elseif llof = 0 then                          'source file nonexistent or empty
   i = ifn.kill(1, cfil)                             'kill the zero-length file
   i = ifn.msgs(cfil + " not found", 5, 24, 79, 1, 1)            'beep and exit
end if                              'display the error message [above] and exit

if cext = "out" then                            'source file extension == "OUT"
   cdst = cnam + ".tmp"                       'set dest.file extension to "TMP"
else                                            'source file extension != "OUT"
   cdst = cnam + ".out"                       'set dest.file extension to "OUT"
end if
ctmp1 = "dtmp1.lol"                            'set temp #1 duplicates filename
ctmp2 = "dtmp2.lol"                            'set temp #2 duplicates filename

i = ifn.msgs("Please standby", 5, 24, 79, 0, 0)               'OK to proceed...

open ctmp1 for output as 2                        'open the temp output bigfile

init = 0                                           'initialize the compare flag
csav = ""                                         'initialize dupl.compare line
cdup = ""                                          'initialize the compare line
imaxnumlen = 0                                   'initialize max. number length
imaxlinlen = 0                                 'initialize max.text line length
while not eof(1)                               'loop until destination file EOF
   line input #1, clin                         'get a line from the source file
   ilinlen = len(rtrim$(clin))                 'current source text line length
   if imaxlinlen < ilinlen then                 'max. linelen < current linelen
      imaxlinlen = ilinlen                     'set max.linelen = curr. linelen
   end if
   ctst = ucase$(mid$(clin, ioffset, icmplen))   'get uppercased target segment
   if ctst = cdup then                       'current line == the previous line
      if not init then                         'a duplicate line was NOT output
         close 3                                'close the temp duplicates file
         open ctmp2 for output as 3            'open a new temp duplicates file
         print #3, csav                        'output the first duplicate line
         init = not 0                           'set duplicates compare flag ON
         itot = 1                             'initialize curr.total duplicates
      end if
      print #3, clin                         'output the current duplicate line
      itot = itot + 1                         'increment curr. total duplicates
   else                                      'current line != the previous line
      inumlen = len(ltrim$(str$(itot)))       'current total dupl.number length
      if imaxnumlen < inumlen then            'max.number len < curr.number len
         imaxnumlen = inumlen                  'set max. numlen to curr. numlen
      end if
      i = ifn.wdup(ctmp2, itot, 2, 3)          'write duplicates to "perm" file
      csav = clin                               'save line from the source file
      cdup = ctst                                'save the first duplicate line
      init = 0                                      'set dupl. compare flag OFF
      itot = 0                                      'zero curr.total duplicates
   end if
wend
inumlen = len(ltrim$(str$(itot)))             'current total dupl.number length
if imaxnumlen < inumlen then                  'max.number len < curr.number len
   imaxnumlen = inumlen                        'set max. numlen to curr. numlen
end if
i = ifn.wdup(ctmp2, itot, 2, 3)                'write duplicates to "perm" file
close                                         'close all open files before sort
shell "srtf " + ctmp1 + " 0 5 " + str$(ioffset + 6) + _
      " " + str$(icmplen) + "/no/r"         'sort file by dupl.count+comparelen

open "dtmp1.out" for input as 1                   'open the temp output bigfile
open cdst for output as 2                           'open the final output file
ioffset = ioffset + 6                               'increment offset per above

init = 0                                           'initialize the compare flag
isep = 0                                          'init.the separator line flag
csav = ""                                         'initialize dupl.compare line
cdup = ""                                          'initialize the compare line
inumoffset = 5 - imaxnumlen                     'offset for writing final lines
while not eof(1)                               'loop until destination file EOF
   line input #1, clin                         'get a line from the source file
   ctst = ucase$(mid$(clin, ioffset, icmplen))   'get uppercased target segment
   if ctst = cdup then                       'current line == the previous line
      if not init then                         'a duplicate line was NOT output
         if not isep then                      'separator line flag NOT yet set
            isep = not 0                      'just set the separator line flag
         else                                 'separator line flag has been set
            print #2, string$(imaxlinlen + imaxnumlen + 1, "-")'print separator
         end if
         print #2, mid$(csav, inumoffset + 1)      'output first duplicate line
         init = not 0                                  'set the compare flag ON
      end if
      print #2, mid$(clin, inumoffset + 1)   'output the current duplicate line
   else                                      'current line != the previous line
      csav = clin                               'save line from the source file
      cdup = ctst                                'save the first duplicate line
      init = 0                                        'set the compare flag OFF
   end if
wend

close                                  'close files before browsing output file
kill ctmp1                               'kill the temp #1 duplicates filenames
kill ctmp2                               'kill the temp #2 duplicates filenames
kill "dtmp1.out"                            'kill temp #1 temp/output filenames
shell "brow " + cdst                        'browse the destination output file
system                                      'return control to operating system

function ifn.wdup(ctmp2, itot, itmp1, itmp2)       'write temp. data to bigfile
   if itot then                                  'duplicates exist in temp file
      ctot = space$(5)                         'initialize the number formatter
      rset ctot = right$(str$(itot), 5)        'set total into number formatter
      close itmp2                               'close the temp duplicates file
      open ctmp2 for input as itmp2            'reopen the temp duplicates file
      while not eof(itmp2)                    'loop until end of temp dupl.file
         line input #itmp2, clin             'get next line from temp dupl.file
         print #itmp1, ctot; " "; clin        'write number and line to bigfile
      wend
   end if
end function
