思路描述:主要使用正则表达式解析。
返回一个跟踪步骤列表。
- public class TrackingData
- {
- public string time { get; set; }
- public string context { get; set; }
- }
- public class DHLExpressTrackingHelper
- {
- private static string urlFormat = "http://webtrack.dhlglobalmail.com/?trackingnumber={0}";
- public static List<TrackingData> GetTrackingData(string trackCode)
- {
- //trackCode = "A150529020091";
- WebMocker dhlMocker = new WebMocker();
- var url = string.Format(urlFormat, trackCode);
- var html = dhlMocker.Get(url);
- //1
- var patternOLTag = "<ol.*?>(?<olvalue>.*?)</ol>";
- var match = Regex.Match(html, patternOLTag, RegexOptions.Multiline | RegexOptions.Singleline);
- if (match.Success)
- {
- var olValue = match.Groups["olvalue"].Value;
- return ParseTrackingData(olValue);
- }
- return new List<TrackingData>();
- }
- private static List<TrackingData> ParseTrackingData(string olValue)
- {
- var patternLITag = "<li.*? class=\"(?<className>.*?)\".*?>(?<content>.*?)</li>";
- var list = new List<TrackingData>();
- string datePrefix = null;
- foreach (Match item in Regex.Matches(olValue, patternLITag, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase))
- {
- var className = item.Groups["className"].Value;
- var content = item.Groups["content"].Value;
- if (className == "timeline-date")
- {
- datePrefix = content;
- }
- else if (className.Contains("timeline-event"))
- {
- var pp = "<div.*?class=\"(?<divClassName>.*?)\".*?>(?<divContent>.*?)</div>";
- TrackingData trackingData = null;
- foreach (Match subItem in Regex.Matches(content, pp, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase))
- {
- var divClassName = subItem.Groups["divClassName"].Value.Trim();
- var subContent = subItem.Groups["divContent"].Value.Trim();
- if (divClassName == "timeline-time")
- {
- subContent = Regex.Replace(subContent, "<[^<]*>", "");
- trackingData = new TrackingData();
- trackingData.time = datePrefix + " " + subContent;
- }
- else if (divClassName == "timeline-location")
- {
- trackingData.context += subContent + " ";
- }
- else if (divClassName == "timeline-description")
- {
- trackingData.context += subContent + " ";
- }
- //else if (divClassName == "timeline-location timeline-location-responsive")
- //{
- // trackingData.Description += subContent;
- //}
- }
- list.Add(trackingData);
- }
- }
- //foreach (var item in list)
- //{
- // Console.WriteLine(item.DateString + "--" + item.Description);
- //}
- return list;
- }
- }