|
![]() |
|
Registriert seit: 5. Nov 2004 193 Beiträge Turbo Delphi für Win32 |
#1
Hatte gerade das gleiche Problem und habe nu provisorisch erst mal folgendes gebastelt und zusammengesucht
Delphi-Quellcode:
einfach mit html2txt() aufrufen
function GiveSZ(HCode: string): Char;
var i : Integer; begin Result := ' '; if (HCode = '"') or (HCode = '"') then Result := '"'; if (HCode = '&') or (HCode = '&') then Result := '&'; if (HCode = '<') or (HCode = '<') then Result := '<'; if (HCode = '>') or (HCode = '>') then Result := '>'; // ISO 160 bis ISO 255 Codes if (HCode = '') or (HCode = ' ') then Result := ' '; if (HCode = '&iexl;') or (HCode = '¡') then Result := '¡'; if (HCode = '¢') or (HCode = '¢') then Result := '¢'; if (HCode = '£') or (HCode = '£') then Result := '£'; if (HCode = '¤') or (HCode = '¤') then Result := '¤'; if (HCode = '¥') or (HCode = '¥') then Result := '¥'; if (HCode = '&brkbar;') or (HCode = '¦') then Result := '¦'; if (HCode = '§') or (HCode = '§') then Result := '§'; if (HCode = '¨') or (HCode = '¨') then Result := '¨'; if (HCode = '©') or (HCode = '©') then Result := '©'; if (HCode = 'ª') or (HCode = 'ª') then Result := 'ª'; if (HCode = '«') or (HCode = '«') then Result := '«'; if (HCode = '¬') or (HCode = '¬') then Result := '¬'; if (HCode = '­') or (HCode = '*') then Result := '*'; if (HCode = '®') or (HCode = '®') then Result := '®'; if (HCode = '&hibar;') or (HCode = '¯') then Result := '¯'; if (HCode = '°') or (HCode = '°') then Result := '°'; if (HCode = '±') or (HCode = '±') then Result := '±'; if (HCode = '²') or (HCode = '²') then Result := '²'; if (HCode = '³') or (HCode = '³') then Result := '³'; if (HCode = '´') or (HCode = '´') then Result := '´'; if (HCode = 'µ') or (HCode = 'µ') then Result := 'µ'; if (HCode = '¶') or (HCode = '¶') then Result := '¶'; if (HCode = '·') or (HCode = '·') then Result := '·'; if (HCode = '¸') or (HCode = '¸') then Result := '¸'; if (HCode = '¹') or (HCode = '¹') then Result := '¹'; if (HCode = 'º') or (HCode = 'º') then Result := 'º'; if (HCode = '»') or (HCode = '»') then Result := '»'; if (HCode = '¼') or (HCode = '¼') then Result := '¼'; if (HCode = '½') or (HCode = '½') then Result := '½'; if (HCode = '¾') or (HCode = '¾') then Result := '¾'; if (HCode = '¿') or (HCode = '¿') then Result := '¿'; if (HCode = 'À') or (HCode = 'À') then Result := 'À'; if (HCode = 'Á') or (HCode = 'Á') then Result := 'Á'; if (HCode = 'Â') or (HCode = 'Â') then Result := 'Â'; if (HCode = 'Ã') or (HCode = 'Ã') then Result := 'Ã'; if (HCode = 'Ä') or (HCode = 'Ä') then Result := 'Ä'; if (HCode = 'Å') or (HCode = 'Å') then Result := 'Å'; if (HCode = '&AEling;') or (HCode = 'Æ') then Result := 'Æ'; if (HCode = 'Ç') or (HCode = 'Ç') then Result := 'Ç'; if (HCode = 'È') or (HCode = 'È') then Result := 'È'; if (HCode = 'É') or (HCode = 'É') then Result := 'É'; if (HCode = 'Êe;') or (HCode = 'Ê') then Result := 'Ê'; if (HCode = 'Ë') or (HCode = 'Ë') then Result := 'Ë'; if (HCode = 'Ì') or (HCode = 'Ì') then Result := 'Ì'; if (HCode = 'Í') or (HCode = 'Í') then Result := 'Í'; if (HCode = 'Îe;') or (HCode = 'Î') then Result := 'Î'; if (HCode = 'Ï') or (HCode = 'Ï') then Result := 'Ï'; if (HCode = 'Ð') or (HCode = 'Ð') then Result := 'Ð'; if (HCode = 'Ñ') or (HCode = 'Ñ') then Result := 'Ñ'; if (HCode = 'Ò') or (HCode = 'Ò') then Result := 'Ò'; if (HCode = 'Ó') or (HCode = 'Ó') then Result := 'Ó'; if (HCode = 'Ô') or (HCode = 'Ô') then Result := 'Ô'; if (HCode = 'Õ') or (HCode = 'Õ') then Result := 'Õ'; if (HCode = 'Ö') or (HCode = 'Ö') then Result := 'Ö'; if (HCode = '×') or (HCode = '×') then Result := '×'; if (HCode = 'Ø') or (HCode = 'Ø') then Result := 'Ø'; if (HCode = 'Ù') or (HCode = 'Ù') then Result := 'Ù'; if (HCode = 'Ú') or (HCode = 'Ú') then Result := 'Ú'; if (HCode = 'Û') or (HCode = 'Û') then Result := 'Û'; if (HCode = 'Ü') or (HCode = 'Ü') then Result := 'Ü'; if (HCode = 'Ý') or (HCode = 'Ý') then Result := 'Ý'; if (HCode = 'Þ') or (HCode = 'Þ') then Result := 'Þ'; if (HCode = 'ß') or (HCode = 'ß') then Result := 'ß'; if (HCode = 'à') or (HCode = 'à') then Result := 'à'; if (HCode = 'á') or (HCode = 'á') then Result := 'á'; if (HCode = 'â') or (HCode = 'â') then Result := 'â'; if (HCode = 'ã') or (HCode = 'ã') then Result := 'ã'; if (HCode = 'ä') or (HCode = 'ä') then Result := 'ä'; if (HCode = 'å') or (HCode = 'å') then Result := 'å'; if (HCode = '&aeling;') or (HCode = 'æ') then Result := 'æ'; if (HCode = 'ç') or (HCode = 'ç') then Result := 'ç'; if (HCode = 'è') or (HCode = 'è') then Result := 'è'; if (HCode = 'é') or (HCode = 'é') then Result := 'é'; if (HCode = 'ê') or (HCode = 'ê') then Result := 'ê'; if (HCode = 'ë') or (HCode = 'ë') then Result := 'ë'; if (HCode = 'ì') or (HCode = 'ì') then Result := 'ì'; if (HCode = 'í') or (HCode = 'í') then Result := 'í'; if (HCode = 'î') or (HCode = 'î') then Result := 'î'; if (HCode = 'ï') or (HCode = 'ï') then Result := 'ï'; if (HCode = 'ð') or (HCode = 'ð') then Result := 'ð'; if (HCode = 'ñ') or (HCode = 'ñ') then Result := 'ñ'; if (HCode = 'ò') or (HCode = 'ò') then Result := 'ò'; if (HCode = 'ó') or (HCode = 'ó') then Result := 'ó'; if (HCode = 'ô') or (HCode = 'ô') then Result := 'ô'; if (HCode = 'õ') or (HCode = 'õ') then Result := 'õ'; if (HCode = 'ö') or (HCode = 'ö') then Result := 'ö'; if (HCode = '÷') or (HCode = '÷') then Result := '÷'; if (HCode = 'ø') or (HCode = 'ø') then Result := 'ø'; if (HCode = 'ù') or (HCode = 'ù') then Result := 'ù'; if (HCode = '&uacude;') or (HCode = 'ú') then Result := 'ú'; if (HCode = 'û') or (HCode = 'û') then Result := 'û'; if (HCode = 'ü') or (HCode = 'ü') then Result := 'ü'; if (HCode = 'ý') or (HCode = 'ý') then Result := 'ý'; if (HCode = 'þ') or (HCode = 'þ') then Result := 'þ'; if (HCode = 'ÿ') or (HCode = 'ÿ') then Result := 'ÿ'; if Result = ' ' then begin delete(HCode, 1, 2); delete(HCode, length(HCode), 1); if TryStrToInt(HCode, i) then Result := Char(i); end; end; function ReplaceHTMLChar(sValue: string): string; var tagStartPos : Integer; tagEndPos : Integer; tag, newTag : string; temp : string; begin tagStartPos := Pos('&', sValue); tagEndPos := PosEx(';', sValue, tagStartPos); if tagEndPos - tagStartPos < 8 then begin tag := copy(sValue, tagStartPos, tagEndPos - tagStartPos + 1); newTag := GiveSZ(tag); temp := copy(sValue, 1, tagStartPos - 1) + newTag + copy(sValue, tagEndPos + 1, length(sValue) - tagEndPos); sValue := temp; tagEndPos := tagEndPos - length(tag) + length(newTag); while (PosEx('&', sValue, tagEndPos) <> 0) and (PosEx(';', sValue, tagEndPos) <> 0) do begin tagStartPos := PosEx('&', sValue, tagEndPos); tagEndPos := PosEx(';', sValue, tagStartPos); if tagEndPos - tagStartPos < 8 then begin tag := copy(sValue, tagStartPos, tagEndPos - tagStartPos + 1); newTag := GiveSZ(tag); temp := copy(sValue, 1, tagStartPos - 1) + newTag + copy(sValue, tagEndPos + 1, length(sValue) - tagEndPos); sValue := temp; tagEndPos := tagEndPos - length(tag) + length(newTag); end; end; end; Result := sValue; end; function Html2Txt(html: string): string; var istag : boolean; i : Integer; ch : Char; temp : string; slRes : TStrings; begin result := ''; temp := ''; istag := false; html := ReplaceHTMLChar(html); for i := 1 to length(html) do begin ch := html[i]; if (ch = '<') and (istag = false) then begin istag := true; continue; end; if (ch = '>') and (istag = true) then begin istag := false; continue; end; if istag = false then temp := temp + ch; end; slRes := TStringList.Create; try slRes.Text := temp; for i := 0 to slRes.Count - 1 do slRes[i] := Trim(slRes[i]); while slRes.IndexOf('') <> -1 do slRes.delete(slRes.IndexOf('')); finally Result := slRes.Text; slRes.Free; end; end; Damit werden alle html-tags und scripte entfernt, sowie die html-sonderzeichen ersetzt. Ich arbeite immo noch an einer Lösung mit regulären Ausdrücken. Gruß tr909 |
![]() |
Registriert seit: 6. Aug 2012 12 Beiträge Delphi 10.2 Tokyo Professional |
#2
Die Funktion ReplaceHTMLChar machte noch immer Probleme bei nicht abgeschlossenem Tag oder wenn einfach mal so ein "&" vorkam.
Habe das nun komplett neu durchdacht:
Delphi-Quellcode:
function ReplaceHTMLChar(sValue: string): string;
var tagStartPos, tagNxtStartPos : Integer; tagEndPos : Integer; tag, newTag : string; Found : BOOLEAN; begin tagEndPos:=1; Result:=sValue; TRY REPEAT Found:=FALSE; tagStartPos:=PosEx('&', Result, tagEndPos); if tagStartPos > 0 then BEGIN tagEndPos:=PosEx(';', Result, tagStartPos); Found:=(tagEndPos > tagStartPos); if Found then BEGIN tagNxtStartPos:=tagStartPos; REPEAT // Gibts vielleicht noch ein Start- vor dem Ende-Zeichen? tagNxtStartPos:=PosEx('&', Result, tagNxtStartPos+1); if (tagNxtStartPos > 0) AND (tagNxtStartPos < tagEndPos) then tagStartPos:=tagNxtStartPos; UNTIL (tagNxtStartPos = 0) OR (tagNxtStartPos > tagEndPos); if (tagEndPos - tagStartPos < 8) then BEGIN tag:=copy(Result, tagStartPos, tagEndPos - tagStartPos + 1); newTag:=GiveSZ(tag); Result:=copy(Result, 1, tagStartPos - 1) + newTag + copy(Result, tagEndPos + 1, length(Result) - tagEndPos); tagEndPos:=tagEndPos - length(tag) + length(newTag); END ELSE tagEndPos:=tagStartPos+1; END; END; UNTIL Not Found; EXCEPT Result:=sValue; // Wenn doch was unvorhergesehenes passiert, dann lieber ungewandelt zurück! END; Bitte Melden, wenn auch damit Probleme entstehen sollten. |
![]() |
Ansicht |
![]() |
![]() |
![]() |
ForumregelnEs ist dir nicht erlaubt, neue Themen zu verfassen.
Es ist dir nicht erlaubt, auf Beiträge zu antworten.
Es ist dir nicht erlaubt, Anhänge hochzuladen.
Es ist dir nicht erlaubt, deine Beiträge zu bearbeiten.
BB-Code ist an.
Smileys sind an.
[IMG] Code ist an.
HTML-Code ist aus. Trackbacks are an
Pingbacks are an
Refbacks are aus
|
|
Nützliche Links |
Heutige Beiträge |
Sitemap |
Suchen |
Code-Library |
Wer ist online |
Alle Foren als gelesen markieren |
Gehe zu... |
LinkBack |
![]() |
![]() |