CSV文件解析类

                            遇到CSV文件解析的问题,我写了一个解析的DataSet类,把CSV文件看成是数据库的表的形式表现出来。效率远远高于EXCEL,65536x65536的表,EXCEl要用上好几秒,这个基本感觉不到延迟
{=================================================================}
{                                                                 }
{                                                                 }
{       CSV DataSet                                               }
{                                                                 }
{                                                                 }
{                                                                 }
{      把CSV文件当成是数据表的DataSet                             }
{                                                                 }
{                                                                 }
{                                                                 }
{    wr960204(王锐  2003/12/2)                                    }
{    QQ:42088303                                                  }
{                                                                 }
{=================================================================}

unit UnitCSVDataSet;

interface

uses
  DB, Classes;

const
  MaxStrLen         = 240;

type
  PRecInfo = ^TRecInfo;
  TRecInfo = packed record
    Bookmark: Integer;
    BookmarkFlag: TBookmarkFlag;
  end;

type
  { TCSVStringList}
  {
    原来的TStringList不行,因为遇到回车他就会把他当成是换行符号
    但是CSV文件的单元格内可以存在回车,因此从TStringList上派生一个类
    覆盖掉SetTextStr的方法
  }
  TCSVStringList = class(TStringList)
  private

  protected
    procedure SetTextStr(const Value: string); override;
  public

  end;

  { TCustomCSVDataSet }

  TCustomCSVDataSet = class(TDataSet)
  private
    FAutoSaveToFile: Boolean;
    FData: TStrings;
    FRecBufSize: Integer;
    FRecInfoOfs: Integer;
    FCurRec: Integer;
    FFileName: string;
    FLastBookmark: Integer;
    FSaveChanges: Boolean;
    FFirstLineAsSchema: Boolean;
    FRecordSize: Integer;
    FSchemaFile: string;
    FSchemaLine: string;
    FFileMustExist: Boolean;
    procedure SetSchemaFile(Value: string);
    procedure SetFileMustExist(Value: Boolean);
    procedure SetFirstLineAsSchema(Value: Boolean);
    //跳过没有内容的行
    procedure RemoveWhiteLines(List: TStrings; IsFileRecord: Boolean);
    procedure SetFileName(const Value: string);
  protected
    { Overriden abstract methods (required) }
    function AllocRecordBuffer: PChar; override;
    procedure FreeRecordBuffer(var Buffer: PChar); override;
    procedure GetBookmarkData(Buffer: PChar; Data: Pointer); override;
    function GetBookmarkFlag(Buffer: PChar): TBookmarkFlag; override;
    function GetRecord(Buffer: PChar; GetMode: TGetMode; DoCheck: Boolean):
      TGetResult; override;
    function GetRecordSize: Word; override;
    procedure InternalAddRecord(Buffer: Pointer; Append: Boolean); override;
    procedure InternalClose; override;
    procedure InternalDelete; override;
    procedure InternalFirst; override;
    procedure InternalGotoBookmark(Bookmark: Pointer); override;
    procedure InternalHandleException; override;
    procedure InternalInitFieldDefs; override;
    procedure InternalInitRecord(Buffer: PChar); override;
    procedure InternalLast; override;
    procedure InternalOpen; override;
    procedure InternalPost; override;
    procedure InternalSetToRecord(Buffer: PChar); override;
    function IsCursorOpen: Boolean; override;
    procedure SetBookmarkFlag(Buffer: PChar; Value: TBookmarkFlag); override;
    procedure SetBookmarkData(Buffer: PChar; Data: Pointer); override;
    procedure SetFieldData(Field: TField; Buffer: Pointer); override;
  protected
    { Additional overrides (optional) }
    function GetRecordCount: Integer; override;
    function GetRecNo: Integer; override;
    procedure SetRecNo(Value: Integer); override;

    property SchemaFile: string read FSchemaFile write SetSchemaFile;
    property FileMustExist: Boolean read FFileMustExist write SetFileMustExist;
    property FirstLineAsSchema: Boolean read FFirstLineAsSchema write
      SetFirstLineAsSchema;
  public

    function GetFieldData(Field: TField; Buffer: Pointer): Boolean; override;
    constructor Create(AOwner: TComponent); override;
  published
    property AutoSaveToFile: Boolean read FAutoSaveToFile write FAutoSaveToFile;
    property FileName: string read FFileName write SetFileName;
    property Active;
  end;

  { TCSVDataSet }

  TCSVDataSet = class(TCustomCSVDataSet)
  protected
    { Overriden abstract methods }
    procedure InternalOpen; override;
    function GetRecordSize: Word; override;
    function GetRecord(Buffer: PChar; GetMode: TGetMode; DoCheck: Boolean)
      : TGetResult; override;
    function GetRecordCount: Integer; override;

  public
    constructor Create(Owner: TComponent); override;
    procedure RemoveBlankRecords;
    procedure RemoveExtraColumns;
    procedure SaveFileAs(strFileName: string);
    function GetFieldData(Field: TField; Buffer: Pointer): Boolean; override;
    procedure CreateDataSet;

  published

    property BeforeOpen;
    property AfterOpen;
    property BeforeClose;
    property AfterClose;
    property BeforeInsert;
    property AfterInsert;
    property BeforeEdit;
    property AfterEdit;
    property BeforePost;
    property AfterPost;
    property BeforeCancel;
    property AfterCancel;
    property BeforeDelete;
    property AfterDelete;
    property BeforeScroll;
    property AfterScroll;
    property BeforeRefresh;
    property AfterRefresh;
    property OnCalcFields;
    property OnDeleteError;
    property OnEditError;
    property OnFilterRecord;
    property OnNewRecord;
    property OnPostError;

    property FieldDefs;
    property FileMustExist;
    property FirstLineAsSchema;
  end;

implementation

uses Windows, SysUtils, Forms, Dialogs;

const
  DELIMITERS_GAP    = 4;

  { TCustomCSVDataSet }

procedure TCustomCSVDataSet.InternalOpen;
var
  I                 : Integer;
begin
  //FData := TStringList.Create;
  FData := TCSVStringList.Create;
  FData.LoadFromFile(FileName);
  if FData.Count = 0 then
    FData.Add('');
  ///====================
  if FFirstLineAsSchema then
  begin
    FSchemaLine := FData[0];
    FData.Delete(0);
  end;
  RemoveWhiteLines(FData, True);
  ///====================
  for I := 1 to FData.Count do
    FData.Objects[I - 1] := Pointer(I);
  FLastBookmark := FData.Count;
  FCurRec := -1;

  FRecInfoOfs := MaxStrLen;

  FRecBufSize := FRecInfoOfs + SizeOf(TRecInfo);

  BookmarkSize := SizeOf(Integer);

  InternalInitFieldDefs;

  if DefaultFields then
    CreateFields;

  BindFields(True);
end;

procedure TCustomCSVDataSet.InternalClose;
begin
  if (FSaveChanges) and (FAutoSaveToFile) then
  begin
    if FFirstLineAsSchema then
      FData.Insert(0, FSchemaLine);
    FData.SaveToFile(FileName);
  end;
  FData.Free;
  FData := nil;

  if DefaultFields then
    DestroyFields;

  FLastBookmark := 0;
  FCurRec := -1;
end;

function TCustomCSVDataSet.IsCursorOpen: Boolean;
begin
  Result := Assigned(FData);
end;

procedure TCustomCSVDataSet.InternalInitFieldDefs;

  function _GetField(const AName: string): TFieldDef;
  var
    I               : Integer;
  begin
    Result := nil;
    for I := 0 to FieldDefs.Count - 1 do
    begin
      if FieldDefs[I].Name = AName then
      begin
        Result := FieldDefs[I];
        Exit;
      end;
    end;
  end;

  function _GetFieldName(const AName: string): string;
  var
    I               : Integer;
  begin
    Result := AName;
    I := 0;
    while _GetField(Result) <> nil do
    begin
      Result := AName;
      Result := Format('Name_%s_%d', [AName, I]);
      Inc(I);
    end;
  end;

var
  i, len            : Integer;
  UseSchema         : Boolean;
  LstFields         : TStrings;
  tmpSchema         : TStrings;
  tmpLen            : Integer;
  tmpFieldName      : string;
begin
  FieldDefs.Clear;

  FRecordSize := 0;

  for i := 0 to FData.Count - 1 do
  begin
    len := Length(FData.Strings[i]);
    if len > FRecordSize then
      FRecordSize := len;
  end;

  if not Assigned(FData) then
    exit;

  LstFields := TStringList.Create;
  tmpSchema := TStringList.Create;

  // Load Schema Structure
  if (SchemaFile <> '') then
  begin
    tmpSchema.LoadFromFile(SchemaFile);
    RemoveWhiteLines(tmpSchema, FALSE);
    if (tmpSchema.Count > 0) then
      if StrScan(PChar(tmpSchema.Strings[0]), ',') <> nil then
      begin
        tmpSchema.CommaText := tmpSchema.Strings[0];
        RemoveWhiteLines(tmpSchema, FALSE);
      end;
  end
  else
    //if (FData.Count > 0) then
    //===============================
    if FFirstLineAsSchema then
    begin
      tmpSchema.CommaText := FSchemaLine;
    end
    else
    begin
      tmpSchema.CommaText := FData.Strings[0];
    end;
  //===============================
  UseSchema := (tmpSchema.Count > 0);

  if ((not UseSchema) and ((FirstLineAsSchema) or (SchemaFile <> ''))) then
  begin
    FFirstLineAsSchema := FALSE;
    FSchemaFile := '';
  end;

  // Interpret Schema
  i := 1;

  tmpLen := FRecordSize;

  repeat
    // Standardize variables on schema

    if not UseSchema then
      tmpFieldName := Format('Field%d=%d', [i, tmpLen])
    else
    begin
      tmpFieldName := tmpSchema.Names[i - 1];
      if (tmpFieldName = '') then
        tmpFieldName := Format('%s=%d', [tmpSchema.Strings[i - 1], tmpLen])
      else
        tmpFieldName := tmpSchema.Strings[i - 1];
    end;

    LstFields.Add(tmpFieldName);

    Inc(i)

  until i > tmpSchema.Count;

  tmpSchema.Free;
  FRecordSize := 0;

  // Add fields
  with LstFields do
    for i := 0 to Count - 1 do
    begin
      tmpFieldName := Names[i];
      len := StrToInt(Values[tmpFieldName]);
      if Len < 16 then
        Len := 16;
      tmpFieldName := _GetFieldName(tmpFieldName);
      FieldDefs.Add(tmpFieldName, ftString, len, False);
      Inc(FRecordSize, len);
      Inc(FRecordSize, DELIMITERS_GAP);
    end;

  LstFields.Free;

  if FRecordSize = 0 then
    FRecordSize := MAXSTRLEN;

  FRecInfoOfs := FRecordSize;
  FRecBufSize := FRecInfoOfs + SizeOf(TRecInfo);

end;

procedure TCustomCSVDataSet.InternalHandleException;
begin
  Application.HandleException(Self);
end;

procedure TCustomCSVDataSet.InternalGotoBookmark(Bookmark: Pointer);
var
  Index             : Integer;
begin
  Index := FData.IndexOfObject(TObject(PInteger(Bookmark)^));
  if Index <> -1 then
    FCurRec := Index
  else
    DatabaseError('Bookmark not found');
end;

procedure TCustomCSVDataSet.InternalSetToRecord(Buffer: PChar);
begin
  InternalGotoBookmark(@PRecInfo(Buffer + FRecInfoOfs).Bookmark);
end;

function TCustomCSVDataSet.GetBookmarkFlag(Buffer: PChar): TBookmarkFlag;
begin
  Result := PRecInfo(Buffer + FRecInfoOfs).BookmarkFlag;
end;

procedure TCustomCSVDataSet.SetBookmarkFlag(Buffer: PChar; Value:
  TBookmarkFlag);
begin
  PRecInfo(Buffer + FRecInfoOfs).BookmarkFlag := Value;
end;

procedure TCustomCSVDataSet.GetBookmarkData(Buffer: PChar; Data: Pointer);
begin
  PInteger(Data)^ := PRecInfo(Buffer + FRecInfoOfs).Bookmark;
end;

procedure TCustomCSVDataSet.SetBookmarkData(Buffer: PChar; Data: Pointer);
begin
  PRecInfo(Buffer + FRecInfoOfs).Bookmark := PInteger(Data)^;
end;

function TCustomCSVDataSet.GetRecordSize: Word;
begin
  Result := MaxStrLen;
end;

function TCustomCSVDataSet.AllocRecordBuffer: PChar;
begin
  GetMem(Result, FRecBufSize);
end;

procedure TCustomCSVDataSet.FreeRecordBuffer(var Buffer: PChar);
begin
  FreeMem(Buffer, FRecBufSize);
end;

function TCustomCSVDataSet.GetRecord(Buffer: PChar; GetMode: TGetMode;
  DoCheck: Boolean): TGetResult;
begin
  if FData.Count < 1 then
    Result := grEOF
  else
  begin
    Result := grOK;
    case GetMode of
      gmNext:
        if FCurRec >= RecordCount - 1 then
          Result := grEOF
        else
          Inc(FCurRec);
      gmPrior:
        if FCurRec <= 0 then
          Result := grBOF
        else
          Dec(FCurRec);
      gmCurrent:
        if (FCurRec < 0) or (FCurRec >= RecordCount) then
          Result := grError;
    end;
    if Result = grOK then
    begin
      StrLCopy(Buffer, PChar(FData[FCurRec]), MaxStrLen);
      with PRecInfo(Buffer + FRecInfoOfs)^ do
      begin
        BookmarkFlag := bfCurrent;
        Bookmark := Integer(FData.Objects[FCurRec]);
      end;
    end
    else
      if (Result = grError) and DoCheck then
        DatabaseError('No Records');
  end;
end;

procedure TCustomCSVDataSet.InternalInitRecord(Buffer: PChar);
begin
  FillChar(Buffer^, RecordSize, 0);
end;

function TCustomCSVDataSet.GetFieldData(Field: TField; Buffer: Pointer):
  Boolean;
begin
  StrLCopy(Buffer, ActiveBuffer, Field.Size);
  Result := PChar(Buffer)^ <> #0;
end;

procedure TCustomCSVDataSet.SetFieldData(Field: TField; Buffer: Pointer);
var
  Temp              : TStrings;
  i                 : Integer;
begin
  Temp := TStringList.Create;
  Temp.CommaText := ActiveBuffer;

  for i := Temp.Count to Field.FieldNo - 1 do
    Temp.Add('');

  Temp.Strings[Field.FieldNo - 1] := Copy(PChar(Buffer), 1, Field.DataSize);

  StrLCopy(ActiveBuffer, PChar(Temp.CommaText), FRecordSize);
  DataEvent(deFieldChange, Longint(Field));

  Temp.Free;
end;

procedure TCustomCSVDataSet.InternalFirst;
begin
  FCurRec := -1;
end;

procedure TCustomCSVDataSet.InternalLast;
begin
  FCurRec := FData.Count;
end;

procedure TCustomCSVDataSet.InternalPost;
begin
  FSaveChanges := True;
  if State = dsEdit then
    FData[FCurRec] := ActiveBuffer
  else
  begin
    Inc(FLastBookmark);
    //FData.InsertObject(FCurRec, ActiveBuffer, Pointer(FLastBookmark));
    FData.InsertObject(RecordCount, ActiveBuffer, Pointer(FLastBookmark));
  end;
end;

procedure TCustomCSVDataSet.InternalAddRecord(Buffer: Pointer; Append:
  Boolean);
begin
  FSaveChanges := True;
  Inc(FLastBookmark);
  if Append then
    InternalLast;
  FData.InsertObject(FCurRec, PChar(Buffer), Pointer(FLastBookmark));
end;

procedure TCustomCSVDataSet.InternalDelete;
begin
  FSaveChanges := True;
  FData.Delete(FCurRec);
  if FCurRec >= FData.Count then
    Dec(FCurRec);
end;

function TCustomCSVDataSet.GetRecordCount: Longint;
begin
  Result := FData.Count;
end;

function TCustomCSVDataSet.GetRecNo: Longint;
begin
  UpdateCursorPos;
  if (FCurRec = -1) and (RecordCount > 0) then
    Result := 1
  else
    Result := FCurRec + 1;
end;

procedure TCustomCSVDataSet.SetRecNo(Value: Integer);
begin
  if (Value >= 0) and (Value < FData.Count) then
  begin
    FCurRec := Value - 1;
    Resync([]);
  end;
end;

{ TcsvTextDataSet }

constructor TCSVDataSet.Create(Owner: TComponent);
begin
  inherited Create(Owner);
  FFileMustExist := True;
  FFirstLineAsSchema := True;
end;

procedure TCustomCSVDataSet.SetFileMustExist(Value: Boolean);
begin
  if ((Active) or (FFileMustExist = Value)) then
    exit;

  FFileMustExist := Value;
end;

procedure TCustomCSVDataSet.SetFirstLineAsSchema(Value: Boolean);
begin
  if (FFirstLineAsSchema = Value) then
    Exit;
  if (Active) then
  begin
    if csDesigning in ComponentState then
    begin
      ShowMessage('请先把数据及关闭才能够设置FirstLineAsSchema属性!');
    end;
    Exit;
  end;
  FFirstLineAsSchema := Value;

  if FFirstLineAsSchema then
    FSchemaFile := '';
end;

procedure TCustomCSVDataSet.SetSchemaFile(Value: string);
begin
  if ((Active) or (FSchemaFile = Value)) then
    exit;

  FSchemaFile := Value;

  if (FSchemaFile <> '') then
    FFirstLineAsSchema := FALSE;
end;

procedure TCustomCSVDataSet.RemoveWhiteLines(List: TStrings; IsFileRecord:
  Boolean);
var
  i                 : integer;
begin
  for i := List.Count - 1 downto 0 do
    if (Trim(List.Strings[i]) = '') then
      if IsFileRecord then
      begin

        FCurRec := i;
        InternalDelete;
      end
      else
        List.Delete(i);
end;

procedure TCSVDataSet.RemoveBlankRecords;
begin
  RemoveWhiteLines(FData, TRUE);
end;

procedure TCSVDataSet.RemoveExtraColumns;
var
  i                 : Integer;
  Temp              : TStrings;
begin
  Temp := TStringList.Create;

  for i := 1 to FData.Count do
  begin
    Temp.CommaText := FData.Strings[i - 1];
    if Temp.Count > FieldDefs.Count then // Remove columns at the end
    begin
      while Temp.Count > FieldDefs.Count do
        Temp.Delete(Temp.Count - 1);

      FData.Strings[i - 1] := Temp.CommaText;
    end;
  end;

  Temp.Free;

  FData.SaveToFile(FileName);
end;

procedure TCSVDataSet.SaveFileAs(strFileName: string);
begin
  if FFirstLineAsSchema then
    FData.insert(0, FSchemaLine);
  FData.SaveToFile(strFileName);
  inherited FileName := strFileName;
end;

procedure TCSVDataSet.InternalOpen;
var
  Stream            : TStream;
begin
  if (not FileMustExist) and (not FileExists(FileName)) then
  begin
    Stream := TFileStream.Create(FileName, fmCreate);
    Stream.Free;
  end;

  inherited InternalOpen;
end;

function TCSVDataSet.GetRecordSize: Word;
begin
  Result := FRecordSize;
end;

function TCSVDataSet.GetFieldData(Field: TField; Buffer: Pointer): Boolean;
var
  Temp              : TStrings;
begin
  if FData.Count = 0 then
    Result := FALSE
  else
  begin
    Temp := TStringList.Create;
    Temp.CommaText := ActiveBuffer;

    if ((Field.FieldNo > 0) and (Field.FieldNo <= Temp.Count)) then
      StrLCopy(PChar(Buffer), PChar(Temp[Field.FieldNo - 1]), Field.DataSize)
    else
      StrCopy(PChar(Buffer), #0);

    Temp.Free;

    Result := PChar(Buffer)^ <> #0;
  end;
end;


function TCSVDataSet.GetRecord(Buffer: PChar; GetMode: TGetMode;
  DoCheck: Boolean): TGetResult;
begin
  Result := grOk;

  {if (FirstLineAsSchema) then // Avoid showing titles when FirstLineAsSchema
    case GetMode of
      gmNext:
        if FCurRec >= RecordCount - 1 then
          Result := grEOF
        else
          if FCurRec < 1 then
            FCurRec := 0;
      gmPrior:
        if FCurRec <= 1 then
          Result := grBOF;
    end;
    }
  if (Result = grOk) then
    Result := inherited GetRecord(Buffer, GetMode, DoCheck);

end;

procedure TCustomCSVDataSet.SetFileName(const Value: string);
begin
  if FFileName = Value then
    Exit;
  if Active then
  begin
    if csDesigning in Self.ComponentState then
    begin
      ShowMessage('必须先将数据集关闭才能设置FileName属性!');
    end;
    Exit;
  end;
  FFileName := Value;
end;

constructor TCustomCSVDataSet.Create(AOwner: TComponent);
begin
  inherited Create(AOwner);
  FAutoSaveToFile := False;
end;

{ TCSVStringList }

procedure TCSVStringList.SetTextStr(const Value: string);
var
  DoubleInver       : Boolean;          //引号是否是双数
  P, Start          : PChar;
  S                 : string;
begin
  BeginUpdate;
  try
    Clear;
    P := Pointer(Value);
    if P <> nil then
      while P^ <> #0 do
      begin
        Start := P;
        DoubleInver := True;
        while True do
        begin
          case P^ of
            '"':                        //遇到引号做个但双数的标识
              DoubleInver := not DoubleInver;
            #0:
              break;
            #10: //回车,如果引号是双数就说明是转换到下一行,单数就是单元格内的回车
              begin
                if DoubleInver then
                  Break;
              end;
          end;
          Inc(P);
        end;
        if (P - 1)^ = #13 then
          SetString(S, Start, P - Start - 1)
        else
          SetString(S, Start, P - Start);
        Add(S);
        if P^ = #10 then
          Inc(P);
      end;
  finally
    EndUpdate;
  end;
end;

procedure TCSVDataSet.CreateDataSet;
var
  I                 : Integer;
  DBFile            : TStrings;
  AFieldNams        : string;
begin
  if (Self.Active) then
  begin
    if csDesigning in Self.ComponentState then
      ShowMessage('数据源不能是活动的!');
    Exit;
  end;
  if (FFileName = '') then
  begin
    ShowMessage('必须指定文件名!');
    Exit;
  end;
  if FieldDefs.Count = 0 then
  begin
    ShowMessage('FieldDefs是空的!');
    Exit;
  end;
  DBFile := TCSVStringList.Create;
  try
    AFieldNams := '';
    for I := 0 to FieldDefs.Count - 1 do
    begin
      if I = 0 then
        AFieldNams := FieldDefs[i].Name
      else
        AFieldNams := AFieldNams + ',' + FieldDefs[i].Name;
    end;
    DBFile.Add(AFieldNams);
    DBFile.SaveToFile(FFileName);
    FieldDefs.Clear;
  finally
    DBFile.Free;
  end;
  Active := True;
end;

function TCSVDataSet.GetRecordCount: Integer;
begin
  //if FFirstLineAsSchema then
  //  Result := FData.Count - 1
  //else
  Result := inherited GetRecordCount;
end;

end. 
                                

查看回复