Pages

Wednesday, July 20, 2011

EDI File Parser

A utility to quickly parse and validate EDI file. Support for direct file path or EC Run Control.
This example uses delivered VCHR definition.

Note: assumed some field positions here, best is to read Map for field position

#!/bin/ksh93

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
#     EDI Pattern Matching script - parse / check EDI files
#
#     Env: Redhat Linux/ksh
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~         
scr=$(basename $0)
sqf=/tmp/"$scr"_$$.sql              # temp sql file
spool=/tmp/"$scr"_$$.lst            # sqlplus spool file

# cleanup at process end
trap "rm $sqf 2>/dev/null;rm $spool 2>/dev/null; rm $0.$$ 2>/dev/null;" 0
trap "echo Process Cancelled; exit 1" 1 2 3 15

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                 
#                                                                                   
# execute a SQL statement - output stored in $rc & spool file
#                                                                             
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                 
execSQL() {               

  echo "set pagesize 0 feedback off echo off heading off verify off linesize 200;" >$sqf
  echo "define SPOOL_FILE='$spool'"           >>$sqf 
  echo "WHENEVER SQLERROR EXIT SQL.SQLCODE"   >>$sqf 
  echo "WHENEVER OSERROR EXIT 1"              >>$sqf
  echo "spool &SPOOL_FILE"                    >>$sqf
  echo "$sql"                                 >>$sqf
  echo "/"                                    >>$sqf
  echo "spool off"                            >>$sqf
  echo "quit"                                 >>$sqf

  $ORACLE_HOME/bin/sqlplus -s $usr/$pwd @$sqf > /dev/null

  if [ $? -ne 0 ]; then
     # error running sql

     echo 'Error running SQL statement:'$sql

     if [ -f $spool ]; then
        cat $spool
     fi   

     exit 1

  fi

  return 0

}

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                   
#                                                                                                 
# parseEDI - parse EDI file
#                                             
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                   
function parseEDI
{

if [ $# -ne 1 ]; then
   echo 'Error: parseEDI() input file missing'
   return 1
fi

input_file=$1

awk -v dict="$spool" -v vbose=$vbose -v debug=$debug -v hdr=$hdr '

function ltrim(s) { sub(/^[ \t]+/, "", s); return s }
function rtrim(s) { sub(/[ \t]+$/, "", s); return s }
function trim(s)  { return rtrim(ltrim(s)); }

BEGIN {
i1=0;
i2=0;
i3=0;
i4=0;
i5=0;
c1=0;
c2=0;
c3=0;
c4=0;
c5=0;

while ((getline < dict ) > 0)
  {
   if ( match(substr($1,1,3),"000"))
      {
        vh_no[i1] = $2;
        vh_name[i1] = $3;
        vh_beg[i1] = $4;
        vh_len[i1++] = $5;
      }
   else
   if ( match(substr($1,1,3),"001"))
      {
        vl_no[i2] = $2;
        vl_name[i2] = $3;
        vl_beg[i2] = $4;
        vl_len[i2++] = $5;
      }
   else
   if ( match(substr($1,1,3),"002"))
      {
        dl_no[i3] = $2;
        dl_name[i3] = $3;
        dl_beg[i3] = $4;
        dl_len[i3++] = $5;
      }
   else
   if ( match(substr($1,1,3),"003"))
      {
        xl_no[i4] = $2;
        xl_name[i4] = $3;
        xl_beg[i4] = $4;
        xl_len[i4++] = $5;
      }
   else
   if ( match(substr($1,1,3),"00V"))
      {
        vn_no[i5] = $2;
        vn_name[i5] = $3;
        vn_beg[i5] = $4;
        vn_len[i5++] = $5;
      };
  }
close(dict)

}

{
if ( match(substr($1,1,3),"000"))
   {
    print "==============================================================================#"NR" (hdr)"; 
    if (vbose==1)
       for (idx=0; idx< i1;idx++ ) printf("%-20s(%04d-%04d)=%s.\n", vh_name[idx],vh_beg[idx],vh_beg[idx]+vh_len[idx]-1,substr($0,vh_beg[idx],vh_len[idx]))
    else
       if (debug==1)
          for (idx=0; idx< i1;idx++ ) {if (trim(substr($0,vh_beg[idx],vh_len[idx]))!="") printf("%d:%-20s(%04d-%04d)=%s.\n",NR,vh_name[idx], vh_beg[idx],vh_beg[idx]+vh_len[idx]-1, substr($0,vh_beg[idx],vh_len[idx]))}
       else
          for (idx=0; idx< i1;idx++ ) if (trim(substr($0,vh_beg[idx],vh_len[idx]))!="") printf("%-20s=%s.\n",vh_name[idx], substr($0,vh_beg[idx],vh_len[idx]));
    c1++;
   }
else
if ( hdr==0 && match(substr($1,1,3),"001"))
   {
    print "------------------------------------------------------------------------------#"NR" (line)";
    if (vbose==1)
       for (idx=0; idx< i2;idx++ ) printf("%-20s(%04d-%04d)=%s.\n",vl_name[idx],vl_beg[idx],vl_beg[idx]+vl_len[idx]-1,substr($0,vl_beg[idx],vl_len[idx]))
    else
       if (debug==1)
          for (idx=0; idx< i2;idx++ ) {if (trim(substr($0,vl_beg[idx],vl_len[idx]))!="") printf("%d:%-20s(%04d-%04d)=%s.\n",NR,vl_name[idx],vl_beg[idx],vl_beg[idx]+vl_len[idx]-1, substr($0,vl_beg[idx],vl_len[idx]))}
       else
          for (idx=0; idx< i2;idx++ ) if (trim(substr($0,vl_beg[idx],vl_len[idx]))!="") printf("%-20s=%s.\n",vl_name[idx],substr($0,vl_beg[idx],vl_len[idx]));
    c2++;
   }
else
if ( hdr==0 && match(substr($1,1,3),"002"))
   {
    print "------------------------------------------------------------------------------#"NR" (dist)";
    if (vbose==1)
       for (idx=0; idx< i3;idx++ ) printf("%-20s(%04d-%04d)=%s.\n",dl_name[idx],dl_beg[idx],dl_beg[idx]+dl_len[idx]-1,substr($0,dl_beg[idx],dl_len[idx]))
    else
       if (debug==1)
          for (idx=0; idx< i3;idx++ ) {if (trim(substr($0,dl_beg[idx],dl_len[idx]))!="") printf("%d:%-20s(%04d-%04d)=%s.\n",NR,dl_name[idx],dl_beg[idx],dl_beg[idx]+dl_len[idx]-1, substr($0,dl_beg[idx],dl_len[idx]))}
       else
          for (idx=0; idx< i3;idx++ ) if (trim(substr($0,dl_beg[idx],dl_len[idx]))!="") printf("%-20s=%s.\n",dl_name[idx],substr($0,dl_beg[idx],dl_len[idx]));
    c3++;
   }
else
if ( hdr==0 && match(substr($1,1,3),"003"))
   {
    print "------------------------------------------------------------------------------#"NR" (dlt)";
    if (vbose==1)
       for (idx=0; idx< i4;idx++ ) printf("%-20s(%04d-%04d)=%s.\n",xl_name[idx],xl_beg[idx],xl_beg[idx]+xl_len[idx]-1,substr($0,xl_beg[idx],xl_len[idx]))
    else
       if (debug==1)
          for (idx=0; idx< i4;idx++ ){if (trim(substr($0,xl_beg[idx],xl_len[idx]))!="") printf("%d:%-20s(%04d-%04d)=%s.\n",NR, xl_name[idx],xl_beg[idx],xl_beg[idx]+xl_len[idx]-1,substr($0,xl_beg[idx],xl_len[idx]))}
       else
          for (idx=0; idx< i4;idx++ ) if (trim(substr($0,xl_beg[idx],xl_len[idx]))!="") printf("%-20s=%s.\n",xl_name[idx],substr($0,xl_beg[idx],xl_len[idx]));
    c4++;
   }
if ( hdr==0 && match(substr($1,1,3),"00V"))
   {
    print "------------------------------------------------------------------------------#"NR" (vndr)";
    if (vbose==1)
       for (idx=0; idx< i5;idx++ ) printf("%-20s(%04d-%04d)=%s.\n",vn_name[idx],vn_beg[idx],vn_beg[idx]+vn_len[idx]-1,substr($0,vn_beg[idx],vn_len[idx]))
    else
       if (debug==1)
          for (idx=0; idx< i5;idx++ ){if (trim(substr($0,vn_beg[idx],vn_len[idx]))!="") printf("%d:%-20s(%04d-%04d)=%s.\n",NR, vn_name[idx],vn_beg[idx],vn_beg[idx]+vn_len[idx]-1,substr($0,vn_beg[idx],vn_len[idx]))}
       else
          for (idx=0; idx< i5;idx++ ) if (trim(substr($0,vn_beg[idx],vn_len[idx]))!="") printf("%-20s=%s.\n",vn_name[idx],substr($0,vn_beg[idx],vn_len[idx]));
    c5++;
   };


}

END{
    print "==============================================================================";
    print "Hdr=",c1;
    print "Line=",c2;
    print "Dist=",c3;
    if (c5>0) print "Vndr=",c4;
    if (c4>0) print "Xdlt=",c4;
    print "total=" c1+c2+c3+c4+c5;
}

' $input_file

}

function chkEDI
{
   if [ $# -ne 1 ]; then
  echo "Error: $0 missing EDI file name"
  return 1
fi

   chk_file=$1
   echo "File: $chk_file (EC Map='$map_id)"
       
   # check non-Ascii
   echo "$indent Checking Non-Ascii..."

   grep  -qP "[\x80-\xFF]" $chk_file

   if [ $? -eq 0 ]; then

      # store out AWK results in var
      read cnt1 <<< $(grep  -nP "[\x80-\xFF]" $chk_file | awk -F\: '{printf "#%d.", $1} END{print x" "}'| tee /dev/tty)

      echo "$indent $indent Error non-ascii found on above line(s). Total: "$(echo $cnt1 | grep -o \#  |wc -l)
      return 1
   fi

   # check ROWID 000-003
   echo "$indent Checking ROWID..."

   # get valid EC ID from map spool & create expression for grep command

   cut -c1-3  $chk_file | grep -v $(cut -c1-3 $spool|sort|uniq|sed 's/^/-e /' | tr '\n' ' ') | sort |uniq -c > $0.$$

   if [ -s $0.$$ ]; then

      (( cnt1 = 0));

      while read cnt lvl; do
            echo -n "$indent $lvl($cnt): "
            grep -n ^"$lvl" $chk_file | awk -F\: '{printf "#%d.", $1} END{print " "}'
            (( cnt1 = cnt1 + 1 ))
      done < $0.$$

      echo "$indent $indent Error: invalid ROWID found on above line(s). Total Count: $cnt1"
      return 1
   fi

   # check dups
   echo "$indent Checking Dups (vid/vln/dln)..."

   # check dup vid
   grep ^000  $chk_file | cut -c9-16 | sort | uniq -c | grep -v "^ \{5\} 1 " > $0.$$

   if [ -s $0.$$ ]; then

      (( cnt1 = 0));

      while read cnt vid; do
         echo -n "$indent $vid($cnt): "
         grep -n ^000.....$vid $chk_file | awk -F\: -v vid=$vid '{printf "#%d.", $1} END{print " "}'
         (( cnt1 = cnt1 + 1 ))
      done < $0.$$

      echo "$indent Error: duplicate VID(s) found on above line(s). Total Count: $cnt1"
      return 1
     
   fi

   # check dup vchr ln
   grep ^001  $chk_file | cut -c9-16,17-21 --output-delimiter=" " | sort | uniq -c | grep -v "^ \{5\} 1 " > $0.$$

   if [ -s $0.$$ ]; then

      (( cnt1 = 0));

      while read cnt vid vln; do
         echo -n "$indent $indent $vid $vln ($cnt): "
         grep -n ^001....."$vid$vln" $chk_file | awk -F\: -v vid=$vid -v vln=$vln '{printf "#%d.", $1} END{print " "}'
         (( cnt1 = cnt1 + 1 ))
      done < $0.$$

      echo "$indent Error: duplicate Vchr Line(s) found on above line(s). Total Count: $cnt1"
      return 1
   fi

   # check dup distrib ln
   grep ^002  $chk_file | cut -c9-16,17-21,22-26 --output-delimiter=" " | sort | uniq -c | grep -v "^ \{5\} 1 " > $0.$$

   if [ -s $0.$$ ]; then

      (( cnt1 = 0));

      while read cnt vid vln dst; do
         echo -n "$indent $indent $vid $vln $dst ($cnt): "
         grep -n ^002....."$vid$vln$dst" $chk_file | awk -F\: -v vid=$vid -v vln=$vln '{printf "#%d.", $1} END{print " "}'
         (( cnt1 = cnt1 + 1 ))
      done < $0.$$

      echo "$indent Error: duplicate Distrib Line(s) found on above line(s). Total Count: $cnt1"
      return 1
   fi


   #check dates
   echo "$indent Checking dates..."
                 
   parseEDI $chk_file  | grep -e '_DATE ' -e '_DT ' | tr '[:=.]' ' ' >$0.$$

   dateErr=0
   (( cnt1 = 0)); 

   while read recno fld pos dt; do
 
      bad=1

      if [[ $dt = 20{2}([0-9])/[0-1][0-9]/[0-3][0-9] ]] && [[ $(date -d "$dt" >/dev/null 2>&1; echo $?) -eq 0 ]]; then
         bad=0
      fi
 
      if [ $bad -eq 1 ]; then
          echo "$indent $indent #"$recno" "$fld" "$pos" "$dt
          dateErr=1
         (( cnt1 = cnt1 + 1 ))
      fi
 
   done < $0.$$
 
   if [ $dateErr -eq 1 ]; then
      echo "$indent $indent Error: invalid dates found on above line(s). Count: $cnt1"
      return 1
   fi

   #check EOL
   echo "$indent Checking EOL..."
 
   cnt1=$(tail -1 $chk_file | wc -l)


   if [ $cnt1 -ne 1 ]; then
      echo "$indent $indent Error: EOL missing"
      return 1
   fi
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                   
#                                                                                                 
# usage - display usage info
#                                             
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~                   
function usage {

cat <<EOF
EDI file parser
Usage: $scr [-r run_control ] [-u user ] [-v] edi_file
   -a: check combined file
   -d: check file
   -h: header only
   -m: Map ID
   -r: PS Run Control
   -u: PS User
   -v: verbose mode
EOF

exit 1
}

####################################################################
map_id=''
typeset -i vbose=0
typeset -u user='BATCH'
typeset -i debug=0
typeset -i hdr=0;
typeset -i all_files=0
runcntl=""
indent="   ";

while getopts :dhm:r:u:v args
do                   
     case $args in                                       

     d)      debug=1
             ;;             

     h)      hdr=1
             ;;

     m)      map_id="$OPTARG"
             ;;                           

     r)      runcntl="$OPTARG"
             ;;                           

     u)      user="$OPTARG"
             ;;                           

     v)      vbose="1"
             ;;               

     ?)     echo 'Invalid option: '$OPTARG
            usage
            ;;                                     

     esac             
done                                                     

shift $(($OPTIND - 1))   

# must specify edi file name or a run control
if [ ${runcntl:=x} = "x" ] && [ $# = 0 ]; then
   usage
fi

# get user id & pw
usr=$(ora_user)
pwd=$(ora_pwd)

edi_file=$1

if [ ${runcntl:=x} != "x" ]; then
   # get file path & map id 
   sql="select trim(ECONEFILEPATH)||trim(ECONEFILENAME)||','||trim(ECFORCETRANS) from ps_ECRUNCNTL where oprid='"$user"' and RUN_CNTL_ID='"$runcntl"'"
   execSQL

   if [ ! -s $spool ]; then
      echo 'Cannot find EC Run Control for PS User '$user' Run Control '$runcntl
      exit 1
   fi

   edi_file=`awk -F, '{print $1}' $spool`

   if [ ! -s $edi_file ]; then
      echo 'EDI file '$edi_file' not found'
      exit 1
   fi
 
   sql="select trim(ECMAPID) from ps_ECMAPDEFN where ECTRANSID=trim('"`awk -F, '{print $2}' $spool`"')"
   execSQL

   map_id=`awk '{print $1}' $spool`

else
   if [ ! -f $edi_file ]; then
      echo 'EDI file '$edi_file' not found'
      exit 1
   fi
fi

if [ ${map_id:-x} = 'x' ]; then
 
   tag=`head -1 $edi_file | cut -c1-3`
 
   if [ $tag = '999' ]; then
 
      sql="select trim(ECMAPID) from ps_ECMAPDEFN where ECTRANSID=trim('"`head -1 $edi_file | cut -c4-18`"')"
      execSQL
 
      map_id=`awk '{print $1}' $spool`
     
   else
      if [ $tag = '000' ]; then
     
         # vchr type offset & length
         sql="select ECSTARTPOS||'-'|| (ECSTARTPOS+ LENGTH-1) from ps_ECINMAPFLD where ECMAPID = 'AVCHRDEFN' and ECFILEROWID='000' and FIELDNAME='TX_VOUCHER_TYPE'"
         execSQL
 
         vt_pos=$(cat $spool)

         vchr_type=`head -1 $edi_file | cut -c$vt_pos`

         if [ "$vchr_type" = 'SPV ' ]; then     
            map_id='ASPVDEFN'
         else
            if [ "$vchr_type" = 'SVV ' -o "$vchr_type" = 'MVV ' ] ; then     
               map_id='AVCHRDEFN'
            fi
         fi
      fi
   fi
fi

if [ ${map_id:-x} = 'x' ]; then
   echo 'Cannot get EC Map ID'
   exit 1
fi

if [ ${runcntl:=x} != "x" ]; then
    echo 'EC Map='$map_id' (file='$edi_file')'
#else
    #echo "EDI file: $edi_file (EC Map='$map_id)"
fi

# get EC Map in spool file
#sql="select (select ECROWIDVALUE from ps_ECINMAPFLD where ECMAPID=m1.ECMAPID and  ECFILEROWID=m1.ECFILEROWID and seqno=1) ECFILEROWID ,seqno,FIELDNAME, ECSTARTPOS,length,ECSTARTPOS+length-1 from ps_ECINMAPFLD m1 where ECMAPID='"11$map_id"' order by 1,2"
sql="select ECFILEROWID ,seqno,FIELDNAME, ECSTARTPOS,length,ECSTARTPOS+length-1 from ps_ECINMAPFLD m1 where ECMAPID='"$map_id"' order by 1,2"
execSQL

# parse or check option
if [ $debug -eq 1 ]; then

   #disable hdr mode - check all levels
   hdr=0;
   list="";
   err=0;
 
   # check combined file or individual file?
 
   if [ $2'x' != 'x' ]; then
      list="$2*"
     
      for file in $list; do
      chkEDI $file
     
      if [ $? -ne 0 ]; then
            err=1     
      fi
      done

      if [ $err -eq 0 ]; then

      echo 'File: '$edi_file' (combined)'
      echo "$indent Checking dups ..."

         # get vchr id count
         grep ^000 $edi_file | cut -c9-16 | sort | uniq -c | grep -v "^ \{5\} 1 " > $0.$$
       
         if [ -s $0.$$ ]; then
            (( cnt1 = 0));
       
            while read cnt vid; do
               echo -n "$indent "$vid": "

               # find vid in original file
               grep -n ^000.....$vid $list | awk -F":" '{if (NR >1) printf ", "; printf "#"$2" ("$1")"}'
               echo " "

               (( cnt1 = cnt1 + 1 ))
           
            done < $0.$$
       
            echo "$indent Error: duplicate VID(s) found on above line(s). Count: $cnt1"
        fi
      fi
   else
      chkEDI $edi_file     
   fi
else
   echo EC Map=$map_id
   parseEDI $edi_file
fi