抓取圖片html tag

   1: private List<string> GetImagesInHTMLString(string htmlString)
   2: {
   3:     List<string> images = new List<string>();
   4:     string pattern = @"<(img)\b[^>]*>";
   5:  
   6:     Regex rgx = new Regex(pattern, RegexOptions.IgnoreCase);
   7:     MatchCollection matches = rgx.Matches(htmlString);
   8:  
   9:     for (int i = 0, l = matches.Count; i < l; i++)
  10:     {
  11:         images.Add(matches[i].Value);
  12:     }
  13:  
  14:     return images;
  15: }

抓取圖片 URL

   1: public List<string> FetchLinksFromSource(string htmlSource) {
   2:  
   3:     List<string> links = new List<string>(); 
   4:     string regexImgSrc = @"<img[^>]*?src\s*=\s*[""']?([^'"" >]+?)[ '""][^>]*?>";
   5:     MatchCollection matchesImgSrc = Regex.Matches(htmlSource, regexImgSrc, RegexOptions.IgnoreCase | RegexOptions.Singleline);
   6:     foreach (Match m in matchesImgSrc) 
   7:     { 
   8:         string href = m.Groups[1].Value; 
   9:         links.Add(href);
  10:     } 
  11:     return links; 
  12: }
arrow
arrow
    全站熱搜

    baechang 發表在 痞客邦 留言(0) 人氣()