void Main() { // 建立 Browser 的配置 var config = AngleSharp.Configuration.Default.WithDefaultLoader(); // 根據配置建立出我們的 Browser var browser = BrowsingContext.New(config); }
// 這邊為了方便處理也順便把 `Main` 改成非同步的版本 async Task Main() { var config = AngleSharp.Configuration.Default.WithDefaultLoader(); var browser = BrowsingContext.New(config); // 這邊用的型別是 AngleSharp 提供的 AngleSharp.Dom.Url var url = new Url("https://www.ptt.cc/bbs/Beauty/index.html"); // 使用 OpenAsync 來打開網頁抓回內容 var document = await browser.OpenAsync(url); document.Dump(); }
// 加上 `WithDefaultCookies()` 來加上預設的 Cookie var config = AngleSharp.Configuration.Default .WithDefaultLoader() .WithDefaultCookies(); var browser = BrowsingContext.New(config);
var url = new Url("https://www.ptt.cc/bbs/Beauty/index.html"); // 加上已滿十八歲的 Cookie 來通過年齡驗證頁面 browser.SetCookie(url, "over18=1'"); var document = await browser.OpenAsync(url);
async Task Main() { var config = AngleSharp.Configuration.Default .WithDefaultLoader() .WithDefaultCookies(); var browser = BrowsingContext.New(config); var url = new Url("https://www.ptt.cc/bbs/Beauty/index.html"); browser.SetCookie(url, "over18=1'"); var document = await browser.OpenAsync(url); document.Body.InnerHtml.Dump(); }
Configuration.Default .WithDefaultLoader(new LoaderOptions { IsResourceLoadingEnabled = true }) .WithCss();
document .QuerySelectorAll("div.r-ent") // 指定 class 為 r-ent 的 div .Select(node => node.InnerHtml) // 直接抓內容出來看看 .Dump();
public class Post { public string Title { get; set; } public int Push { get; set; } public string Link { get; set; } }
var titleElement = post.QuerySelector("div.title > a"); var title = titleElement?.InnerHtml; // 標題文字 var link = titleElement?.GetAttribute("href"); // 文章連結
var pushString = post.QuerySelector("div.nrec > span")?.InnerHtml; var pushCount = pushString == "爆" ? 100 : Int16.TryParse(pushString, out var push) ? push : 0;
var posts = postSource.Select(post => { var titleElement = post.QuerySelector("div.title > a"); var title = titleElement?.InnerHtml; var link = titleElement?.GetAttribute("href"); var pushString = post.QuerySelector("div.nrec > span")?.InnerHtml; var pushCount = pushString == "爆" ? 100 : Int16.TryParse(pushString, out var push) ? push : 0; return new Post { Title = title, Link = link, Push = pushCount }; }) .Where(post => post.Title != null) .Dump();
var nextPageLink = document .QuerySelector("div.btn-group.btn-group-paging > a:nth-child(2)") .GetAttribute("href") .Dump(); // = /bbs/Beauty/index3980.html
private async Task<IEnumerable<Post>> GetPosts( IBrowsingContext browser, string baseUrl, string pageUrl, int remainingPages) { }
private async Task<IEnumerable<Post>> GetPosts( IBrowsingContext browser, string baseUrl, string pageUrl, int remainingPages) { // 組裝 Url 並設定 Cookie var url = new Url(baseUrl + pageUrl); browser.SetCookie(url, "over18=1'"); var document = await browser.OpenAsync(url); // 取出所有文章標題 var postSource = document.QuerySelectorAll("div.r-ent"); var posts = postSource.Select(post => { var titleElement = post.QuerySelector("div.title > a"); var title = titleElement?.InnerHtml; var link = titleElement?.GetAttribute("href"); var pushString = post.QuerySelector("div.nrec > span")?.InnerHtml; var pushCount = pushString == "爆" ? 100 : Int16.TryParse(pushString, out var push) ? push : 0; return new Post { Title = title, Link = link, Push = pushCount }; }) .Where(post => post.Title != null); // 取得下一頁的連結 var nextPageUrl = document .QuerySelector("div.btn-group.btn-group-paging > a:nth-child(2)") .GetAttribute("href"); document.Close(); // 檢查剩餘頁數 remainingPages--; if (remainingPages == 0) { return posts; } // 組裝遞迴取得的文章列表 var nextPagePosts = await GetPosts(browser, baseUrl, nextPageUrl, remainingPages); return posts.Concat(nextPagePosts); }
async Task Main() { var config = AngleSharp.Configuration.Default .WithDefaultLoader() .WithDefaultCookies(); var browser = BrowsingContext.New(config); var baseUrl = "https://www.ptt.cc"; var indexUrl = "/bbs/Beauty/index.html"; var pages = 10; var posts = await GetPosts(browser, baseUrl, indexUrl, pages); }
posts.Where(post => post.Push > 90).Dump();
LineNotify做個推播通知啦,還是乾脆掛到排程服務去定時爬資料啦,都是很彈性很自由的了。