有一段时间,我试图从这个html表中获取数据,我尝试了付费和免费组件。我试着做了一些编码,但也没有结果。我有一个类可以直接为ClientDataSet抛出html表,但是这个表不起作用。有人对如何在这个html表中获取数据有任何建议吗?还是一种将其转换为txt / xls / csv或xml的方法?以下是表的代码:
WebBrowser1.Navigate('http://site2.aesa.pb.gov.br/aesa/monitoramentoPluviometria.do?metodo=listarMesesChuvasMensais');
WebBrowser1.OleObject.Document.All.Tags('select').Item(0).Value:= '2013';
WebBrowser1.OleObject.Document.All.Tags('select').Item(1).Value:= '7';
WebBrowser1.OleObject.Document.All.Tags('input').Item(1).click;
Memo1.Text:= WebBrowser1.OleObject.Document.All.Tags('table').Item(10).InnerHTML;
Memo1.Lines.SaveToFile('table.html');
发布于 2014-07-20 14:16:05
下面将从目标页面的HTML表中提取数据,并将其加载到ClientDataSet中。
这是相当冗长的,也许证明了,正如大卫所说,德尔福可能不是最好的工具,这项工作。
在我的Form1上,我有一个TEdit,edValue,用于输入HTML数据中第一个数据行中的值。我将此作为在HTML文档中查找表的一种方法。我敢说,有更好的方法,但至少我的方法应该比硬编码假设的文件的布局,其中的表格嵌入,可能无法生存的变化,页的作者。
总的来说,代码的工作方式是首先使用我的edValue.Text的内容找到HTML单元格,然后找到单元格所属的表,然后填充CDS的字段和表中的数据。
默认情况下,CDS字段设置为255个字符;可能有一个发布在网页上的数据规范,允许您对某些字段(如果不是全部)使用一个较小的值。它们都被假定为ftString类型,以避免代码被意外的单元格内容阻塞。
顺便说一句,底部是一个用于在本地保存HTML页面的实用函数,这样就可以节省点击按钮来选择一个年份+月份。要从保存的文件中重新加载WebBrowser,只需使用文件的名称作为要加载的URL。
TForm1 = class(TForm)
[ ... ]
public
{ Public declarations }
Doc : IHtmlDocument2;
procedure TForm1.btnFindValueClick(Sender: TObject);
var
Table : IHTMLTable;
begin
Doc := WebBrowser1.Document as IHTMLDocument2;
Table := FindTableByCellValue(edValue.Text);
Assert(Table <> Nil);
LoadCDSFromHTMLTable(CDS, Table);
end;
procedure TForm1.LoadCDSFromHTMLTable(DestCDS : TClientDataSet; Table : IHTMLTable);
var
I,
J : Integer;
vTable : OleVariant;
iRow : IHTMLTableRow;
FieldName,
FieldValue : String;
Field : TField;
const
cMaxFieldSize = 255;
scIDFieldName = 'ID';
begin
// Use OleVariant instead of IHTMLTable becuse it's less fiddly for doing what follows
vTable := Table;
Assert(not DestCDS.Active and (DestCDS.FieldCount = 0));
// First create an AutoInc field
Field := TAutoIncField.Create(Self);
Field.FieldName := scIDFieldName;
Field.DataSet := DestCDS;
// Next create CDS fields from the names in the cells in the first row of the table
for I := 0 to (vTable.Rows.Item(0).Cells.Length - 1) do begin
FieldName := vTable.Rows.Item(0).Cells.Item(I).InnerText;
Field := TStringField.Create(Self);
// At this point, we might want to clean up the FieldName by removing embedded spaces, etc
Field.FieldName := FieldName;
Field.Size := cMaxFieldSize;
Field.DataSet := DestCDS;
end;
DestCDS.DisableControls;
try
DestCDS.IndexFieldNames := scIDFieldName;
DestCDS.CreateDataSet;
// Next load the HTML table data into the CDS
for I := 1 to (vTable.Rows.Length - 1) do begin
DestCDS.Insert;
for J := 0 to vTable.Rows.Item(0).Cells.Length - 1 do begin
FieldValue := vTable.Rows.Item(I).Cells.Item(J).InnerText;
// the J + 1 is because Fields[0] is the autoinc one
DestCDS.Fields[J + 1].AsString := FieldValue;
end;
DestCDS.Post;
end;
DestCDS.First;
finally
DestCDS.EnableControls;
end;
end;
function TForm1.FindTableCellByTagValue(Doc : IHtmlDocument2; const AValue : String) : IHTMLTableCell;
var
All: IHTMLElementCollection;
Value: String;
I,
Len: Integer;
E: OleVariant;
iE : IHTMLElement;
iT : IHTMLTextElement;
iC : IHTMLTableCell;
begin
Result := Nil;
All := Doc.All;
if All = Nil then Exit;
Len := All.Length;
for I := 0 to Len - 1 do begin
E := All.Item(I, varEmpty);
iE := IDispatch(E) as IHTMLElement;
if Supports(iE, IHTMLTableCell, iC) then begin
Value := Trim(iE.Get_InnerText);
if Pos(Trim(AValue), Value) = 1 then begin
Result := iC;
Break;
end
end
else
Continue;
end;
end;
function TForm1.FindTableByCellValue(Value : String): IHTMLTable;
var
Node : IHtmlElement;
iTable : IHTMLTable;
iCell : IHTMLTableCell;
begin
Result := Nil;
iCell := FindTableCellByTagValue(Doc, edValue.Text);
if iCell = Nil then
Exit;
Node := IDispatch(iCell) as IHtmlElement;
// if we found a Node with the cell text we were looking for,
// we can now find the HTML table to which it belongs
while Node <> Nil do begin
Node := Node.parentElement;
if Supports(Node, IHTMLTable, iTable) then begin
Result := iTable;
Break;
end;
end;
end;
procedure TForm1.SaveFileLocally(const FileName : String);
var
PFile: IPersistFile; // declared in ActiveX unit
begin
PFile := Doc as IPersistFile;
PFile.Save(StringToOleStr(FileName), False);
end;
发布于 2014-07-21 04:56:40
经过一段时间的研究,我终于从html表格中提取了数据。为了简化,我可以直接从html表中提取数据,而不必“解析”它是标记‘表’和'item‘11 'item’10有相同的数据,但在一个单元格中。因此,我用html和StringGrid填充了表中的每个元素,然后找到了一种通过ClientDataSet直接填充dbgrid的方法。我会张贴代码(单位)作为一个例子,为此你需要一个人。我想感谢所有在评论中帮助我的人。随着更多的研究发现,最好的方法来做这个过程是MSHTML。
unit Unit1;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.OleCtrls, SHDocVw, Vcl.StdCtrls,
Vcl.Grids, Vcl.DBGrids, Data.DB, Datasnap.DBClient;
type
TForm1 = class(TForm)
WebBrowser1: TWebBrowser;
DBGrid1: TDBGrid;
StringGrid1: TStringGrid;
Button1: TButton;
Button2: TButton;
ClientDataSet1: TClientDataSet;
DataSource1: TDataSource;
ClientDataSet1MunicípioPosto: TStringField;
ClientDataSet1TotalMensalmm: TStringField;
ClientDataSet1ClimatologiaMensalmm: TStringField;
ClientDataSet1Desviomm: TStringField;
ClientDataSet1Desvio: TStringField;
ClientDataSet1id: TAutoIncField;
procedure FormCreate(Sender: TObject);
procedure Button1Click(Sender: TObject);
procedure Button2Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R *.dfm}
procedure TForm1.Button1Click(Sender: TObject);
var
irow, jcol: Integer;
ovTable: OleVariant;
begin
ovTable := WebBrowser1.OleObject.Document.all.tags('table').item(11);
ShowMessage('Number of Rows: '+IntToStr(ovTable.Rows.Length));
ShowMessage('Number of Cols: '+IntToStr(ovTable.Rows.Item(0).Cells.Length));
StringGrid1.RowCount:= ovTable.Rows.Length+1;
StringGrid1.ColCount:= ovTable.Rows.Item(0).Cells.Length+1;
for irow := 0 to (ovTable.Rows.Length - 1) do
begin
for jcol := 0 to (ovTable.Rows.Item(irow).Cells.Length - 1) do
begin
StringGrid1.Cells[jcol+1, irow+1] := ovTable.Rows.Item(irow).Cells.Item(jcol).InnerText;
end;
end;
end;
procedure TForm1.Button2Click(Sender: TObject);
var
iRow : Integer;
iCol : Integer;
ovTable: OleVariant;
begin
ovTable := WebBrowser1.OleObject.Document.all.tags('table').item(11);
for iRow := 1 to (ovTable.Rows.Length - 1) do
begin
ClientDataSet1.Open;
ClientDataSet1.insert;
for iCol := 0 to (ovTable.Rows.Item(iRow).Cells.Length - 1) do
begin
ClientDataSet1.FieldByname('Município/Posto').AsString:=ovTable.Rows.Item(iRow).Cells.Item(0).InnerText;
ClientDataSet1.FieldByname('Total Mensal (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(1).InnerText;
ClientDataSet1.FieldByname('Climatologia Mensal (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(2).InnerText;
ClientDataSet1.FieldByname('Desvio (mm)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(3).InnerText;
ClientDataSet1.FieldByname('Desvio (%)').AsString:=ovTable.Rows.Item(iRow).Cells.Item(4).InnerText;
end;
ClientDataSet1.Post;
ClientDataSet1.IndexFieldNames:= 'id';
ClientDataSet1.First;
end;
end;
procedure TForm1.FormCreate(Sender: TObject);
begin
WebBrowser1.Navigate('C:\htmlwiththetable.html');
end;
end
。
https://stackoverflow.com/questions/24838237
复制相似问题