Thema: Delphi Unicode search

Einzelnen Beitrag anzeigen

mijack

Registriert seit: 18. Mai 2007
11 Beiträge
 
Delphi 2007 Enterprise
 
#1

Unicode search

  Alt 24. Mai 2007, 16:19
I used this some times ago to find a keyword in some HTML Files
But when i modify it to search for a Unicode keyword ( Word ) it gives an empty result is there any wrong issue with it . ( i use TntSysUtils units )
Delphi-Quellcode:
function ScanFile(const filename: String;
                 const forString: String;
                 caseSensitive: Boolean ): LongInt;
{ returns position of string in file or -1, if not found }
const
 BufferSize= $8001; { 32K+1 bytes }
var
 pBuf, pEnd, pScan, pPos : PWidechar;
 filesize: LongInt;
 bytesRemaining: LongInt;
 bytesToRead: Integer;
 F : File;
 SearchFor: PWidechar;
 oldMode: Word;
begin
 Result := -1; { assume failure }
 if (Length( forString ) = 0) or (Length( filename ) = 0) then
   Exit;
 SearchFor := nil;
 pBuf := nil;

 { open file as binary, 1 byte recordsize }
 AssignFile( F, filename );
 oldMode := FileMode;
 FileMode := 0; { read-only access }
 Reset( F, 1 );
 FileMode := oldMode;
 try { allocate memory for buffer and pchar search string }
   SearchFor := StrAllocW( Length( forString )+1 );
   StrPCopyW( SearchFor, forString );
  if not caseSensitive then { convert to upper case }

  Tnt_WideUpperCase(SearchFor ); //
    // AnsiUpperCase( SearchFor );
   GetMem( pBuf, BufferSize );
   filesize := System.Filesize( F );
   bytesRemaining := filesize;
   pPos := nil;
   while bytesRemaining > 0 do
   begin
     { calc how many bytes to read this round }
     if bytesRemaining >= BufferSize then
       bytesToRead := Pred( BufferSize )
     else
       bytesToRead := bytesRemaining;

     { read a buffer full and zero-terminate the buffer }
     BlockRead(F, pBuf^, bytesToRead, bytesToRead);
     pEnd := @pBuf[ bytesToRead ];
     pEnd^:= #0;
     { scan the buffer. Problem: buffer may contain #0 chars! So we
       treat it as a concatenation of zero-terminated strings. }

     pScan := pBuf;
     while pScan < pEnd do
     begin
      if not caseSensitive then { convert to upper case }
        Tnt_WideUpperCase( pScan );
       pPos := StrPosW( pScan, SearchFor ); { search for substring }
       if pPos <> nil then
       begin { Found it! }
         Result := FileSize - bytesRemaining +
                   LongInt( pPos ) - LongInt( pBuf );
         Break;
       end;
       pScan := StrEndW( pScan );
       Inc( pScan );
     end;
     if pPos <> nil then
       Break;
     bytesRemaining := bytesRemaining - bytesToRead;
     if bytesRemaining > 0 then
     begin
     { no luck in this buffers load. We need to handle the case of
      the search string spanning two chunks of file now. We simply
      go back a bit in the file and read from there, thus inspecting
      some characters twice
     }

       Seek( F, FilePos(F)-Length( forString ));
       bytesRemaining := bytesRemaining + Length( forString );
     end;
   end; { While }
 finally
   CloseFile( F );
   If SearchFor <> nil then
     StrDisposeW( SearchFor );
   If pBuf <> nil then
     FreeMem( pBuf, BufferSize );
 end;
end; { ScanFile }
procedure GetFileList( FileList: TStringList; inDir, Extension : String );
 procedure ProcessSearchRec( aSearchRec : TSearchRecW );
 var
  sDate: String;
 begin
   if ( aSearchRec.Attr and faDirectory ) <> 0 then
   begin
     if ( aSearchRec.Name <> '.' ) and
        ( aSearchRec.Name <> '..' ) then
     begin
       GetFileList( FileList, Extension, InDir + '\' + aSearchRec.Name );
     end;
   end
   else
   begin
     sDate := DateTimeToStr(FileDateToDateTime(aSearchRec.Time));
     FileList.Add(inDir + '\' + aSearchRec.Name);

   end;

 end;

var CurDir : String;
 aSearchRec : TSearchRecW;
begin
 CurDir := inDir + '\*.' + Extension;
 if WideFindFirst( CurDir, faAnyFile, aSearchRec ) = 0 then
 begin
   ProcessSearchRec( aSearchRec );
   while WideFindNext( aSearchRec ) = 0 do
     ProcessSearchRec( aSearchRec );
 end;
 WideFindClose(aSearchRec);

end;



procedure TForm1.GetHTMLFileList(Directory, SearchString: WideString;
  CaseSens: Boolean);
var
 FL: TStringList;
begin
 FL := TStringList.Create;
 FL.Sorted := True;
 GetFileList(FL, Directory, 'HTM*');
 ProcessHTMLFIles(FL, SearchString, CaseSens);
 FL.Free;
end;


procedure TForm1.ProcessHTMLFiles(FileList: TStringList;
  SearchString: WideString; CaseSens: Boolean);
var
 i: Integer;
begin
 for i := 0 to Pred(FileList.Count) do
 begin
   if ScanFile(FileList.Strings[i], SearchString, CaseSens) > 0 then
   begin
     // The result was found
     Memo1.Lines.Add(FileList.Strings[i]); // a memo is TntMemo
   end;
 end;
end;
many thanks
  Mit Zitat antworten Zitat