Install-Package AngleSharp
dotnet add package AngleSharp
paket add AngleSharp
using AngleSharp; using AngleSharp.Html.Parser; var context = BrowsingContext.New(Configuration.Default); var parser = new HtmlParser(context); var htmlContent = "<html><body><h1>Hello, World!</h1></body></html>"; var document = await parser.ParseDocumentAsync(htmlContent); Console.WriteLine(document.Title); // 输出: Hello, World!
var element = document.QuerySelector("h1"); Console.WriteLine(element.TextContent); // 输出: Hello, World! var elements = document.QuerySelectorAll("p"); foreach (var p in elements) { Console.WriteLine(p.TextContent); }
using AngleSharp; using AngleSharp.Html.Parser; var config = Configuration.Default.WithDefaultLoader(); var context = BrowsingContext.New(config); var url = "https://example.com"; var document = await context.OpenAsync(url); var links = document.QuerySelectorAll("a[href]"); foreach (var link in links) { var href = link.GetAttribute("href"); Console.WriteLine(href); }
var elements = document.QuerySelectorAll("div.container > p"); foreach (var paragraph in elements) { Console.WriteLine(paragraph.TextContent); }
var container = document.QuerySelector("div.container"); container.Traverse(node => { if (node is IElement element && element.TagName == "A") { {Console.WriteLine($"Found link: {element.GetAttribute("href")}"); } });
var config = Configuration.Default.WithJs(); var context = BrowsingContext.New(config); var url = "https://example.com"; var document = await context.OpenAsync(url);// 等待页面加载完成 await document.WaitForReadyStateAsync(); var dynamicContent = document.QuerySelector("#dynamic-content").TextContent; Console.WriteLine(dynamicContent);
var form = document.QuerySelector("form#login-form"); form.Inputs["username"].Value = "user"; form.Inputs["password"].Value = "pass"; var response = await form.SubmitAsync(); Console.WriteLine(response.Content);
var config = Configuration.Default .WithCss() .WithoutJs(); var context = BrowsingContext.New(config);
var config = Configuration.Default.WithDefaultLoader(new LoaderOptions { IsResourceLoadingEnabled = true }).WithLogging(console => console.WriteLine); var context = BrowsingContext.New(config);
var config = Configuration.Default.WithJs(); var context = BrowsingContext.New(config); var url = "https://example.com"; var document = await context.OpenAsync(url);// 等待页面加载完成 await document.WaitForReadyStateAsync(); var dynamicContent = document.QuerySelector("#dynamic-content").TextContent;Console.WriteLine(dynamicContent);
var script = @"function onButtonClick() { console.log('Button clicked!'); } document.querySelector('button').addEventListener('click', onButtonClick);"; var config = Configuration.Default.WithJs(); var context = BrowsingContext.New(config); var url = "https://example.com"; var document = await context.OpenAsync(url); await document.ExecuteScriptAsync(script);
try{ var config = Configuration.Default.WithDefaultLoader(); var context = BrowsingContext.New(config); var url = "https://example.com"; var document = await context.OpenAsync(url); // 处理抓取到的文档}catch (HttpRequestException ex){ Console.WriteLine($"HTTP 请求失败: {ex.Message}");}catch (ParseException ex){ Console.WriteLine($"HTML 解析失败: {ex.Message}");}catch (Exception ex){ Console.WriteLine($"发生未知错误: {ex.Message}");}
var config = Configuration.Default.WithDefaultLoader(new LoaderOptions { IsResourceLoadingEnabled = true }) .WithLogging(console => console.WriteLine); var context = BrowsingContext.New(config);
var loaderOptions = new LoaderOptions{IsResourceLoadingEnabled = true, MaxRetries = 3, RetryDelay = TimeSpan.FromSeconds(5)}; var config = Configuration.Default.WithDefaultLoader(loaderOptions); var context = BrowsingContext.New(config); var url = "https://example.com"; var document = await context.OpenAsync(url);