unit RBScan;
{$MODE OBJFPC}{$H+}
interface
uses
  SysUtils, RBOwn;

type
  EScanError = class(Exception)
  public
    Col, Row: Integer;
    constructor New(ARow, ACol: Integer; AMsg: string);
  end;

  { TTokenType }
  TTokenType = (
    ttInvalid,
    ttEndOfLine,

    ttInteger,
    ttReal,
    ttString,
    ttIdentifier,

    ttKnownWordsBegin,
    ttSub,
    ttFunction,
    ttEnd,
    ttGlobal,
    ttDim,
    ttReDim,
    ttWhile,
    ttWend,
    ttFor,
    ttTo,
    ttStep,
    ttNext,
    ttDo,
    ttLoop,
    ttIf,
    ttThen,
    ttElse,
    ttAnd,
    ttOr,
    ttXOr,
    ttMod,
    ttEqv,
    ttNot,

    ttKnownSymbolsBegin,
    ttComma,
    ttLeftParen,
    ttRightParen,
    ttPeriod,
    ttLeftBracket,
    ttRightBracket,
    ttEqual,
    ttMinus,
    ttPlus,
    ttStar,
    ttSlash,
    ttBackSlash,
    ttCaret,
    ttSemicolon,
    ttLess,
    ttGreat,
    ttLessEqual,
    ttGreatEqual,
    ttInequal
  );

  { TToken }
  TToken = object
    TokenType: TTokenType;
    StrValue: string;
    Original: string;
    IntValue: Int64;
    RealValue: Double;
    Position: Integer;
    function ToString: string;
  end;

  { TScanner }
  TScanner = class(TOwnable)
  private
    FCode: string;
    FToken, FPeek: TToken;
    PeekValid: Boolean;
    Head, Len: Integer;
    procedure RaiseError(AHead: Integer; const AMessage: string);
    procedure SetCode(const ACode: string);
    procedure NewToken;
  public
    constructor Create(AOwner: TOwnable); override;
    procedure Reset;
    function HasMore: Boolean;
    function NextChar: Char;
    procedure Skip(Count: Integer=1);
    procedure SkipSpaces;
    function AtEndOfLine: Boolean;
    function Scan: Boolean;
    function Peek: TToken;
    property Code: string read FCode write SetCode;
    property Token: TToken read FToken;
  end;

const
  TokenName: array [TTokenType] of string = (
    '<invalid>',
    '<end-of-line>',

    '<integer>',
    '<real>',
    '<string>',
    '<identifier>',

    '',
    'Sub',
    'Function',
    'End',
    'Global',
    'Dim',
    'ReDim',
    'While',
    'Wend',
    'For',
    'To',
    'Step',
    'Next',
    'Do',
    'Loop',
    'If',
    'Then',
    'Else',
    'And',
    'Or',
    'XOr',
    'Mod',
    'Eqv',
    'Not',

    '',
    ',',
    '(',
    ')',
    '.',
    '[',
    ']',
    '=',
    '-',
    '+',
    '*',
    '/',
    '\',
    '^',
    ';',
    '<',
    '>',
    '<=',
    '>=',
    '<>'
  );

var
  UpperCaseTokenName: array [TTokenType] of string;

implementation

uses
  RBError, RBUtil;

var
  FS: TFormatSettings;

procedure InitializeUpperCaseTokenNames;
var
  TT: TTokenType;
begin
  for TT:=Low(UpperCaseTokenName) to High(UpperCaseTokenName) do
    UpperCaseTokenName[TT]:=UpperCase(TokenName[TT]);
end;

{ EScanError }
constructor EScanError.New(ARow, ACol: Integer; AMsg: string);
begin
  Col:=ACol;
  Row:=ARow;
  inherited Create(AMsg);
end;

{ TToken }
function TToken.ToString: string;
begin
  FS.DecimalSeparator:='.';
  case TokenType of
    ttInteger: Result:=IntToStr(IntValue);
    ttReal: Result:=FloatStr(RealValue);
    ttString: Result:='"' + StringReplace(StrValue, '"', '""', [rfReplaceAll]) + '"';
    ttIdentifier: Result:=Original;
    else Result:=TokenName[TokenType];
  end;
end;

{ TScanner }
procedure TScanner.RaiseError(AHead: Integer;const AMessage: string);
var
  Col, Row: Integer;
begin
  GetTextRowAndColumn(Code, AHead, Row, Col);
  raise EScanError.New(Row, Col, AMessage);
end;

procedure TScanner.SetCode(const ACode: string);
begin
  if ACode=FCode then Exit;
  FCode:=ACode;
  Reset;
end;

procedure TScanner.NewToken;
begin
  FToken.TokenType:=ttInvalid;
end;

constructor TScanner.Create(AOwner: TOwnable);
begin
  inherited Create(AOwner);
  Reset;
end;

procedure TScanner.Reset;
begin
  Head:=1;
  Len:=Length(Code);
  NewToken;
end;

function TScanner.HasMore: Boolean;
begin
  Result:=Head <= Len;
end;

function TScanner.NextChar: Char;
begin
  if Head <= Len then Result:=Code[Head] else Result:=#0;
end;

procedure TScanner.Skip(Count: Integer);
begin
  Head:=Head + Count;
  if Head > Len then Head:=Len;
end;

procedure TScanner.SkipSpaces;
begin
  while NextChar in [' ', #9, ''''] do begin
    if NextChar='''' then begin
      while HasMore and not AtEndOfLine do Inc(Head);
    end else Inc(Head);
  end;
end;

function TScanner.AtEndOfLine: Boolean;
begin
  Result:=NextChar in [#10, #13];
end;

function TScanner.Scan: Boolean;
var
  TokenHead: Integer;

  procedure ScanString;
  begin
    Inc(Head);
    if not HasMore then RaiseError(Head, emUnexpectedEndOfCode);
    FToken.TokenType:=ttString;
    FToken.StrValue:='';
    while HasMore do begin
      if NextChar='"' then begin
        Inc(Head);
        if HasMore and (NextChar='"') then begin
          Inc(Head);
          FToken.StrValue:=FToken.StrValue + '"';
          Continue;
        end;
        Exit;
      end;
      FToken.StrValue:=FToken.StrValue + NextChar;
      Inc(Head);
    end;
  end;

  procedure ScanNumber;

    function ScanDigitSeries: string;
    begin
      Result:='';
      while HasMore and (NextChar in ['0'..'9']) do begin
        Result:=Result + NextChar;
        Inc(Head);
      end;
    end;

  begin
    FToken.TokenType:=ttInteger;
    FToken.StrValue:=ScanDigitSeries;
    if HasMore and (NextChar='.') then begin
      Inc(Head);
      FToken.StrValue:=FToken.StrValue + '.' + ScanDigitSeries;
      try
        FToken.RealValue:=StrToFloat(FToken.StrValue, FS);
      except
        RaiseError(TokenHead, emFmt(emInvalidRealNumber, FToken.StrValue));
      end;
      FToken.TokenType:=ttReal;
    end else begin
      try
        FToken.IntValue:=StrToInt64(FToken.StrValue);
      except
        RaiseError(TokenHead, emFmt(emInvalidIntegerNumber, FToken.StrValue));
      end;
    end;
  end;

  procedure ScanWord;
  var
    TT: TTokenType;
  begin
    FToken.StrValue:='';
    while HasMore and (NextChar in ['a'..'z', 'A'..'Z', '0'..'9', '_']) do begin
      FToken.StrValue:=FToken.StrValue + NextChar;
      Inc(Head);
    end;

    FToken.Original:=FToken.StrValue;
    FToken.StrValue:=UpperCase(FToken.StrValue);
    for TT:=Succ(ttKnownWordsBegin) to Pred(ttKnownSymbolsBegin) do
      if UpperCaseTokenName[TT]=FToken.StrValue then begin
        FToken.TokenType:=TT;
        Exit;
      end;
    FToken.TokenType:=ttIdentifier;
  end;

  procedure ScanSymbol;
  var
    TT: TTokenType;
    Ch: Char;
  begin
    Ch:=NextChar;
    Inc(Head);
    for TT:=Succ(ttKnownSymbolsBegin) to High(TTokenType) do begin
      if Ch=TokenName[TT] then begin
        if (Ch='<') and (NextChar='=') then FToken.TokenType:=ttLessEqual
        else if (Ch='>') and (NextChar='=') then FToken.TokenType:=ttGreatEqual
        else if (Ch='<') and (NextChar='>') then FToken.TokenType:=ttInequal
        else FToken.TokenType:=TT;
        if FToken.TokenType in [ttLessEqual, ttGreatEqual, ttInequal] then Inc(Head);
        Exit;
      end;
    end;
    RaiseError(TokenHead, emFmt(emUnknownSymbol, Ch));
  end;

begin
  SkipSpaces;
  FToken.TokenType:=ttInvalid;
  PeekValid:=False;
  if not HasMore then Exit(False);

  TokenHead:=Head;
  FToken.Position:=Head;
  if AtEndOfLine then begin
    FToken.TokenType:=ttEndOfLine;
    while NextChar in [#10, #13] do Inc(Head);
  end else if NextChar='"' then
    ScanString
  else if NextChar in ['0'..'9'] then
    ScanNumber
  else if NextChar in ['a'..'z', 'A'..'Z', '_'] then
    ScanWord
  else
    ScanSymbol;
  Exit(True);
end;

function TScanner.Peek: TToken;
var
  SaveHead: Integer;
  SaveToken: TToken;
begin
  if not PeekValid then begin
    SaveHead:=Head;
    SaveToken:=FToken;
    Scan;
    FPeek:=FToken;
    Head:=SaveHead;
    FToken:=SaveToken;
    PeekValid:=True;
  end;
  Result:=FPeek;
end;

initialization
  FS:=DefaultFormatSettings;
  FS.DecimalSeparator:='.';
  InitializeUpperCaseTokenNames;
end.
