-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #94 from michaelmcmillan/studieweb.net
Studieweb.net supported.
- Loading branch information
Showing
6 changed files
with
181 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
var config = require('../../config.js'); | ||
var logger = require('../../log/logger.js'); | ||
var Studieweb = require('../../parsers/studieweb/studieweb.js'); | ||
var QueryFactory = require('../../database/factories/query.js'); | ||
var WebsiteFactory = require('../../database/factories/website.js'); | ||
var ListFactory = require('../../database/factories/list.js'); | ||
var ResultController = require('./result.js'); | ||
|
||
function StudiewebController (req, res, next) { | ||
var queryString = req.query.q; | ||
var studieweb = new Studieweb(); | ||
|
||
QueryFactory.read(queryString, 'website', function (err, cachedWebsites) { | ||
if (err) return next(err); | ||
|
||
// If the cache returned website lets not ask Studieweb | ||
if (cachedWebsites.length > 0) { | ||
logger.log('debug', 'Found Studieweb cache with %d website for "%s"', | ||
cachedWebsites.length, queryString); | ||
|
||
ResultController(cachedWebsites, false, req, res, next); | ||
|
||
// Empty cache means we ask Studieweb | ||
} else { | ||
logger.profile('Studieweb query'); | ||
|
||
studieweb.search(queryString, function (err, website) { | ||
if (err) return next(err); | ||
|
||
logger.profile('Studieweb query'); | ||
logger.log('debug', 'Studieweb returned result'); | ||
|
||
// Store all the websites | ||
WebsiteFactory.create(website, function (err, createdWebsite) { | ||
if (err) return next(err); | ||
logger.log('debug', 'Created website in the database'); | ||
|
||
// Cache the results to the query string | ||
ResultController([createdWebsite], true, req, res, next); | ||
}); | ||
}); | ||
} | ||
}); | ||
} | ||
|
||
module.exports = StudiewebController; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
var config = require('../../config.js'); | ||
var URLParser = require('url'); | ||
var querystring = require('querystring'); | ||
var request = require('request'); | ||
var cheerio = require('cheerio'); | ||
var moment = require('moment'); | ||
var Website = require('../../models/website.js'); | ||
var Author = require('../../models/author.js'); | ||
|
||
function Studieweb () { | ||
|
||
var self = this; | ||
var options = { | ||
'User-Agent': config.crawlers.useragent, | ||
followAllRedirects: true, | ||
maxRedirects: 2, | ||
encoding: null | ||
} | ||
|
||
this.isStudiewebURL = function (url) { | ||
return (url.indexOf('studieweb.net') !== -1); | ||
} | ||
|
||
this.parse = function (nodeHTML) { | ||
|
||
var node = new Website(); | ||
var $ = cheerio.load(nodeHTML); | ||
|
||
// Extracts the title of the node from opengraph and capitalizes it | ||
var ogTitle = $('meta[property="og:title"]'); | ||
var title = ogTitle.attr('content'); | ||
|
||
if (title !== undefined) { | ||
|
||
// Capitalize the first char of title | ||
title = title.charAt(0).toUpperCase() + title.slice(1); | ||
|
||
// Removes the "- Studieweb.net" suffix from the title | ||
var suffix = ' - Studieweb.net'; | ||
if (title.indexOf(suffix, title.length - suffix.length) !== -1) | ||
title = title.substring(0, title.length - suffix.length); | ||
|
||
// Finally set the title | ||
node.setTitle(title); | ||
} | ||
|
||
// Finally return the constructed node (or website if you will) | ||
return node; | ||
} | ||
|
||
this.search = function (url, done) { | ||
options.url = url; | ||
request.get(options, function (err, response, data) { | ||
if (err) return done(err); | ||
if ([404, 501].indexOf(response.statusCode) !== -1) | ||
return done(new Error('Siden finnes ikke på Studieweb.net')); | ||
|
||
var website = self.parse(data); | ||
|
||
// Set the url to be the the provided url | ||
website.setURL(url); | ||
|
||
done(undefined, website); | ||
}); | ||
} | ||
} | ||
|
||
module.exports = Studieweb; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
var assert = require('assert'); | ||
var Studieweb = require('../../parsers/studieweb/studieweb.js'); | ||
|
||
describe('Studieweb', function () { | ||
|
||
it('should pass if url is from studieweb', function () { | ||
var studieweb = new Studieweb(); | ||
assert.equal(studieweb.isStudiewebURL('http://studieweb.net/hva-er-metodelaere/'), true); | ||
}); | ||
|
||
it('should not pass if url is not from studieweb', function () { | ||
var studieweb = new Studieweb(); | ||
assert.equal(studieweb.isStudiewebURL('http://snl.no'), false); | ||
}); | ||
|
||
it('should pass if url is from studieweb.no without protocol', function () { | ||
var studieweb = new Studieweb(); | ||
assert.equal(studieweb.isStudiewebURL('studieweb.net/sosiologi-og-sosialantropologi/'), true); | ||
}); | ||
|
||
it('should extract title from the html of a resource', function () { | ||
var studieweb = new Studieweb(); | ||
var node = studieweb.parse('<meta property="og:title" content="Sosiologi og sosialantropologi" />'); | ||
assert.equal(node.getTitle(), 'Sosiologi og sosialantropologi'); | ||
}); | ||
|
||
it('should format the title with capital letter', function () { | ||
var studieweb = new Studieweb(); | ||
var node = studieweb.parse('<meta property="og:title" content="sosiologi og sosialantropologi" />'); | ||
assert.equal(node.getTitle(), 'Sosiologi og sosialantropologi'); | ||
}); | ||
|
||
it('should filter out the suffix "- Studieweb.net" from the title tag', function () { | ||
var studieweb = new Studieweb(); | ||
var node = studieweb.parse('<meta property="og:title" content="Sosiologi og sosialantropologi - Studieweb.net" />'); | ||
assert.equal(node.getTitle(), 'Sosiologi og sosialantropologi'); | ||
}); | ||
|
||
it('should return undefined when calling .getTitle when no title is set', function () { | ||
var studieweb = new Studieweb(); | ||
var node = studieweb.parse('<meta property="og:non-title" content="Sosiologi og sosialantropologi - Studieweb.net" />'); | ||
assert.equal(node.getTitle(), undefined); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters