program compusta c 10 20 30 40 50 60 70 integer dnum,cic,rec,file,zlist,fyr(20),yeara(20),xrel,stk,dup, * ucode(20),state,county,finc,source(20) character*6 cnum,lcnum character*28 coname,iname,dumnam character*8 smbl character*2 aftnt(70,20),csspin,sdbt,sdbtim,subdbt character*1 cpspin,csspii character*3 cpaper character*10 ein real*4 data(350,20) integer irec,ioff,i,j,fileid,cutoff,ncomps,prev,ncount,create c this file reads the compustat annual ascii file and c writes out a binary DIRECT ACCESS file c files so far are in /usr/compustat and are c pst_annual_current.dat (I got a malformed record error) c mrged_annual_research_current.dat c mrged_annual_research_backdata_1959-1978.dat c fcotc_way_backdata_1950-1969.data open(unit=10, * file='/usr/compustat/pst_ann_wback.ascii', * status='old',form='formatted',recl=8332) open(unit=9,file='/scrtch/compustat/pst_ann_wback.bin', * form='unformatted',access='direct',recl=31253) open(unit=8,file='/scrtch/compustat/pst_ann_wback.idx', * form='formatted',access='sequential') c read header read(10,989) fileid,cutoff,create,ncomps,prev 989 format(442x,i2,i6,i6,4x,i5,i6,7861x) write(*,*) ncomps,fileid,cutoff,create,prev c read records 1 through 4 ncount=0 30 do 10 irec=1,4 c since each record has 5 years, ioff is the year offset ioff=(irec-1)*5+1 read(10,990,end=900) dnum,cnum,cic,rec,file,zlist,dumnam,smbl, * (fyr(i),i=ioff,ioff+4),(yeara(i),i=ioff,ioff+4),xrel, * stk,dup,(ucode(i),i=ioff,ioff+4), * ((aftnt(i,j),i=1,35),j=ioff,ioff+4), * ((data(i,j),i=1,175),j=ioff,ioff+4) c write(*,*) cnum,rec if(cnum.eq.'000000')goto 900 if(irec.eq.1) then coname=dumnam lcnum=cnum endif if(irec.eq.2)iname=dumnam if(lcnum.ne.cnum.or.rec.ne.irec)then write(*,*)'warning will smith! cnum has changed in', * rec,irec,lcnum,cnum goto 900 endif 990 format(i4,a6,i3,i1,i2,i2,a28,a8,10i2,i4,i1,i2,6x,5i1,175a2, * 5(13f10.3,f8.3,f10.3,f8.3,5f10.3,3f8.3,f10.3,f8.3,f10.6, * f10.3,f8.3,3f10.3,f8.3,f10.3,f8.3,2f10.3,2f8.3,6f10.3, * f8.3,3f10.3,3f8.3,f10.4,6f8.3,3f10.3,3f8.3,f10.3,f8.3, * 3f10.3,f8.3,f10.3,2f8.3,f10.3,6f8.3,3f10.3,2f8.3,f10.3, * 4f8.3,f10.3,4f8.3,4f10.3,8f8.3,2f10.3,f8.3,7f10.3,f8.3, * 6f10.3,5f8.3,3f10.3,f8.3,5f10.3,f8.3,4f10.3,2f8.3,f10.3, * 3f8.3,f10.3,11f8.3,f10.3,6f8.3,f10.3,f8.3,f10.3,2f8.3, * f10.3)) c note that the data array for 1-4 and 5-8 are same format! thank God. c for aftnt and data the items cycle fastest, then years, first data is oldest c record 1,3 coname=dumnam (just set on record 1) c record 2,4 iname=dumnam (just set on record 2) 10 continue c records 5 through 8 do 20 irec=5,8 ioff=(irec-5)*5+1 read(10,991) dnum,cnum,cic,rec,file,state,county,finc, * (source(i),i=ioff,ioff+4), * cpspin,csspin,csspii,sdbt,sdbtim,subdbt,cpaper,ein, * ((aftnt(i,j),i=36,70),j=ioff,ioff+4), * ((data(i,j),i=176,350),j=ioff,ioff+4) 991 format(i4,a6,i3,i1,i2,i2,i3,i2,5i2,a1,a2,a1,a2,a2,a2,a3, * 6x,a10,30x,175a2, * 5(13f10.3,f8.3,f10.3,f8.3,5f10.3,3f8.3,f10.3,f8.3,f10.6, * f10.3,f8.3,3f10.3,f8.3,f10.3,f8.3,2f10.3,2f8.3,6f10.3, * f8.3,3f10.3,3f8.3,f10.4,6f8.3,3f10.3,3f8.3,f10.3,f8.3, * 3f10.3,f8.3,f10.3,2f8.3,f10.3,6f8.3,3f10.3,2f8.3,f10.3, * 4f8.3,f10.3,4f8.3,4f10.3,8f8.3,2f10.3,f8.3,7f10.3,f8.3, * 6f10.3,5f8.3,3f10.3,f8.3,5f10.3,f8.3,4f10.3,2f8.3,f10.3, * 3f8.3,f10.3,11f8.3,f10.3,6f8.3,f10.3,f8.3,f10.3,2f8.3, * f10.3)) c count up companies if(rec.eq.8) ncount=ncount+1 if(lcnum.ne.cnum.or.rec.ne.irec)then write(*,*)'warning will smith! cnum has changed in', * rec,irec,lcnum,cnum goto 900 endif 20 continue c write out binary direct write(9,rec=ncount)dnum,cnum,cic,file,zlist,coname,iname,smbl, * fyr,yeara,xrel,stk,dup,ucode,state,county,finc, * source,cpspin,csspin,csspii,sdbt,sdbtim,subdbt, * cpaper,ein,aftnt,data c write out index c note this needs to be sorted on cnum using unix sort write(8,'(a6,1x,i5)')cnum,ncount goto 30 900 write(*,*) 'ncount=',ncount,'ncomps=',ncomps stop end