Untitled diff
35 removals
60 lines
22 additions
52 lines
const algorithmia = require('algorithmia')
import algorithmia from 'algorithmia';
const algorithmiaApiKey = require('../credentials/algorithmia.json').apiKey
const algorithmiaApiKey = require('../credentials/algorithmia.json').apiKey
const sentenceBoundaryDetection = require('sbd')
import sentenceBoundaryDetection from 'sbd';
import { IContent } from './robots.api';
async function robot(content) {
async function robot(content: IContent) {
await fetchContentFromWikipedia(content)
await fetchContentFromWikipedia(content)
sanitizeContent(content)
sanitizeContent(content)
breakContentIntoSentences(content)
breakContentIntoSentences(content)
async function fetchContentFromWikipedia(content) {
async function fetchContentFromWikipedia(content: IContent) {
const algorithmiaAuthenticated = algorithmia(algorithmiaApiKey)
const algorithmiaAuthenticated = algorithmia(algorithmiaApiKey)
const wikipediaAlgorithm = algorithmiaAuthenticated.algo('web/WikipediaParser/0.1.2')
const wikipediaAlgorithm = algorithmiaAuthenticated.algo('web/WikipediaParser/0.1.2')
const wikipediaResponde = await wikipediaAlgorithm.pipe(content.searchTerm)
const wikipediaResponde = await wikipediaAlgorithm.pipe(content.searchTerm)
const wikipediaContent = wikipediaResponde.get()
const { wikipediaContent } = wikipediaResponde.get()
content.sourceContentOriginal = wikipediaContent.content
content.sourceContentOriginal = wikipediaContent
}
}
function sanitizeContent(content) {
function sanitizeContent(content: IContent) {
const withoutBlankLinesAndMarkdown = removeBlankLinesAndMarkdown(content.sourceContentOriginal)
const withoutBlankLinesAndMarkdown = removeBlankLinesAndMarkdown(content.sourceContentOriginal)
const withoutDatesInParentheses = removeDatesInParentheses(withoutBlankLinesAndMarkdown)
const withoutDatesInParentheses = removeDatesInParentheses(withoutBlankLinesAndMarkdown)
content.sourceContentSanitized = withoutDatesInParentheses
content.sourceContentSanitized = withoutDatesInParentheses
function removeBlankLinesAndMarkdown(text) {
function removeBlankLinesAndMarkdown(text: string): string {
const allLines = text.split('\n')
const allLines = text.split('\n')
const withoutBlankLinesAndMarkdown = allLines.filter((line) => {
const filterFunc = (line: string) =>
if (line.trim().length === 0 || line.trim().startsWith('=')) {
!(line.trim().length === 0 || line.trim().startsWith('='))
return false
}
return allLines.filter(filterFunc).join(' ')
return true
})
return withoutBlankLinesAndMarkdown.join(' ')
}
}
}
}
function removeDatesInParentheses(text) {
function removeDatesInParentheses(text: string): string {
return text.replace(/\((?:\([^()]*\)|[^()])*\)/gm, '').replace(/ /g,' ')
return text.replace(/\((?:\([^()]*\)|[^()])*\)/gm, '').replace(/ /g,' ')
}
}
function breakContentIntoSentences(content) {
function breakContentIntoSentences(content: IContent) {
content.sentences = []
const sentences = sentenceBoundaryDetection.sentences(content.sourceContentSanitized)
const sentences = sentenceBoundaryDetection.sentences(content.sourceContentSanitized)
sentences.forEach((sentence) => {
content.sentences = sentences.map((text: string) => ({
content.sentences.push({
text,
text: sentence,
keywords: [],
keywords: [],
images: []
images: []
}))
})
})
}
}
}
}
module.exports = robot
module.exports = robot