
{$MODE OBJFPC}{$H+}

uses
  SysUtils, RegExpr;

(* ================================================================== *)

function ReadText(const AFileName: string): string;
var
  f: file;
  i: integer;
begin
  Assign(f, AFileName);
  Reset(f, 1);
  i := FileSize(f);
  SetLength(result, i);
  BlockRead(f, result[1], i);
  Close(f);
end;

procedure WriteText(const AFileName, AText: string);
var
  f: file;
  i: integer;
begin
  Assign(f, AFileName);
  Rewrite(f, 1);
  i := Length(AText) * SizeOf(char);
  BlockWrite(f, AText[1], i);
  Close(f);
end;

(* ================================================================== *)

type
  TTextCleaner = class
    function ReplaceFunc(AExpr: TRegExpr): string;
    function CleanText(const AText, AExpr: string): string;
  end;

function TTextCleaner.ReplaceFunc(AExpr: TRegExpr): string;
begin
  WriteLn(ErrOutput, 'DEBUG ', AExpr.Expression, ' <<', AExpr.Match[0], '>>');
  result := EmptyStr;
end;

function TTextCleaner.CleanText(const AText, AExpr: string): string;
var
  LExpr: TRegExpr;
begin
  LExpr := TRegExpr.Create(AExpr);
  try
    result := LExpr.Replace(AText, @ReplaceFunc);
  finally
    LExpr.Free;
  end;
end;

(* ================================================================== *)

procedure Demo;
const
  CSource = {$I %FILE%};
var
  LText: string;
begin
  LText := ReadText(CSource);
  
  with TTextCleaner.Create do
  try
    LText := CleanText(LText, '\([^)]*\)');;
    WriteLn(LText);
  finally
    Free;
  end;
end;

(* ================================================================== *)

var
  LText: string;
  LSource, LDest: TFileName;
  
begin
  if (ParamCount = 2) and FileExists(ParamStr(1)) then
  begin
    LSource := ParamStr(1);
    LDest   := ParamStr(2);
    
	  LText := ReadText(LSource);
    
	  with TTextCleaner.Create do
	  try
		  //LText := CleanText(LText, '<script.+?</script>');
		  //LText := CleanText(LText, '/\*.+?\*/');
		  //LText := CleanText(LText, '<!--.+?-->');
		  //LText := CleanText(LText, '<style.+?</style>');
		  //LText := CleanText(LText, '<a class="quickedit" .+?</a>');
		  //LText := CleanText(LText, '<noscript>.+?</noscript>');
	  finally
		  Free;
	  end;
    
    //LText := ReplaceRegExpr('\x0A{2,}', LText, #10, FALSE);
    //LText := ReplaceRegExpr(' href=\n"', LText, ' href="', FALSE);
    //LText := ReplaceRegExpr(' (class|content|href|media|onclick|property|src|title)=\n"', LText, ' $1="', TRUE);
    //LText := ReplaceRegExpr('<(\w+) class="[^"]+">', LText, '<$1>', TRUE);
    
    //LText := ReplaceRegExpr('\n</a>', LText, '</a>', FALSE);
    
    LText := ReplaceRegExpr('\s+\n', LText, #10, FALSE);
    //LText := ReplaceRegExpr('\x0A{2,}', LText, #10, FALSE);
    //
    //LText := StringReplace(LText, '<a class="toggle" href="javascript:void(0)"><span>►</span></a> ', '►', [rfReplaceAll]);
    
    //LText := ReplaceRegExpr(' (class|content|href|media|onclick|property|src|title)=\n"', LText, ' $1="', TRUE);
    
    WriteText(LDest, LText);
  end else
    WriteLn('Usage: cleanhtml FILE_IN FILE_OUT');
end.
