Comparing sensitive data, confidential files or internal emails?

Most legal and privacy policies prohibit uploading sensitive data online. Diffchecker Desktop ensures your confidential information never leaves your computer. Work offline and compare documents securely.

Untitled diff

Created Diff never expires
35 removals
60 lines
22 additions
52 lines
const algorithmia = require('algorithmia')
import algorithmia from 'algorithmia';
const algorithmiaApiKey = require('../credentials/algorithmia.json').apiKey
const algorithmiaApiKey = require('../credentials/algorithmia.json').apiKey
const sentenceBoundaryDetection = require('sbd')
import sentenceBoundaryDetection from 'sbd';
import { IContent } from './robots.api';


async function robot(content) {
async function robot(content: IContent) {
await fetchContentFromWikipedia(content)
await fetchContentFromWikipedia(content)
sanitizeContent(content)
sanitizeContent(content)
breakContentIntoSentences(content)
breakContentIntoSentences(content)


async function fetchContentFromWikipedia(content) {
async function fetchContentFromWikipedia(content: IContent) {
const algorithmiaAuthenticated = algorithmia(algorithmiaApiKey)
const algorithmiaAuthenticated = algorithmia(algorithmiaApiKey)
const wikipediaAlgorithm = algorithmiaAuthenticated.algo('web/WikipediaParser/0.1.2')
const wikipediaAlgorithm = algorithmiaAuthenticated.algo('web/WikipediaParser/0.1.2')
const wikipediaResponde = await wikipediaAlgorithm.pipe(content.searchTerm)
const wikipediaResponde = await wikipediaAlgorithm.pipe(content.searchTerm)
const wikipediaContent = wikipediaResponde.get()

const { wikipediaContent } = wikipediaResponde.get()
content.sourceContentOriginal = wikipediaContent.content
content.sourceContentOriginal = wikipediaContent
}
}


function sanitizeContent(content) {
function sanitizeContent(content: IContent) {
const withoutBlankLinesAndMarkdown = removeBlankLinesAndMarkdown(content.sourceContentOriginal)
const withoutBlankLinesAndMarkdown = removeBlankLinesAndMarkdown(content.sourceContentOriginal)
const withoutDatesInParentheses = removeDatesInParentheses(withoutBlankLinesAndMarkdown)
const withoutDatesInParentheses = removeDatesInParentheses(withoutBlankLinesAndMarkdown)


content.sourceContentSanitized = withoutDatesInParentheses
content.sourceContentSanitized = withoutDatesInParentheses


function removeBlankLinesAndMarkdown(text) {
function removeBlankLinesAndMarkdown(text: string): string {
const allLines = text.split('\n')
const allLines = text.split('\n')


const withoutBlankLinesAndMarkdown = allLines.filter((line) => {
const filterFunc = (line: string) =>
if (line.trim().length === 0 || line.trim().startsWith('=')) {
!(line.trim().length === 0 || line.trim().startsWith('='))
return false
}
return allLines.filter(filterFunc).join(' ')

return true
})

return withoutBlankLinesAndMarkdown.join(' ')
}
}
}
}


function removeDatesInParentheses(text) {
function removeDatesInParentheses(text: string): string {
return text.replace(/\((?:\([^()]*\)|[^()])*\)/gm, '').replace(/ /g,' ')
return text.replace(/\((?:\([^()]*\)|[^()])*\)/gm, '').replace(/ /g,' ')
}
}


function breakContentIntoSentences(content) {
function breakContentIntoSentences(content: IContent) {
content.sentences = []

const sentences = sentenceBoundaryDetection.sentences(content.sourceContentSanitized)
const sentences = sentenceBoundaryDetection.sentences(content.sourceContentSanitized)
sentences.forEach((sentence) => {
content.sentences = sentences.map((text: string) => ({
content.sentences.push({
text,
text: sentence,
keywords: [],
keywords: [],
images: []
images: []
}))
})
})
}
}


}
}


module.exports = robot
module.exports = robot