Extraia dados do XPath por meio do Google Apps Script

Você pode obter / extrair dados de um URL via Google Apps Script, basta passar o XPath e o URL para monitorar.

Eu configurei um Script que monitora continuamente o post principal do YCombinator, quando ele muda eu recebo um email com o link.

function getDataFromXpath(path, url) {
var data = UrlFetchApp.fetch(url);
var text = data.getContentText();
var xmlDoc = Xml.parse(text, true);

// Replacing tbody tag because app script doesnt understand.
path
= path.replace("/html/","").replace("/tbody","","g");
var tags = path.split("/");
Logger.log("tags : " + tags);
// getting the DOM of HTML
var element = xmlDoc.getElement();

for(var i in tags) {
var tag = tags[i];
Logger.log("Tag : " + tag);
var index = tag.indexOf("[");
if(index != -1) {
var val = parseInt(tag[index+1]);
tag
= tag.substring(0,index);
element
= element.getElements(tag)[val-1];
} else {
element
= element.getElement(tag);
}
//Logger.log(element.toXmlString());
}
return element.getText() + ' [ ' + element.getAttribute("href").getValue() + ' ] ';
}


function checkUpdateAndSendEmail() {
var cache = CacheService.getPrivateCache();

var url = "https://news.ycombinator.com/news";
var path = "/html/body/center/table/tbody/tr[3]/td/table/tbody/tr/td[3]/a";
var cached = cache.get(url);
var text = getDataFromXpath(path, url);
if(cached == null || cached != text) {
cache
.put(url, cached, 3666);
MailApp.sendEmail("<email>", "YCombinator Top", text);
Logger.log("Mail Sent!!! ");
}
Logger.log("text : " + text);
Logger.log("cached : " + cached);
}

https://gist.github.com/vs4vijay/6724868