Changes
This commit is contained in:
parent
53f3a20b5d
commit
bfc7524d1b
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
node_modules
|
||||
.vscode
|
0
components/rsvp-controls.js
Normal file
0
components/rsvp-controls.js
Normal file
0
components/rsvp-reader.js
Normal file
0
components/rsvp-reader.js
Normal file
51
components/rsvp-word.js
Normal file
51
components/rsvp-word.js
Normal file
@ -0,0 +1,51 @@
|
||||
import { pivotize } from '../src/textProcessing/pivotize.js'
|
||||
|
||||
class RSVPWord extends HTMLElement {
|
||||
constructor() {
|
||||
super()
|
||||
const shadow = this.attachShadow({ mode: 'open' })
|
||||
const style = document.createElement('style')
|
||||
const word = document.createElement('div')
|
||||
const prefix = document.createElement('span')
|
||||
const pivot = document.createElement('span')
|
||||
const suffix = document.createElement('span')
|
||||
|
||||
word.setAttribute('class', 'word')
|
||||
prefix.setAttribute('class', 'prefix')
|
||||
pivot.setAttribute('class', 'pivot')
|
||||
suffix.setAttribute('class', 'suffix')
|
||||
|
||||
style.textContent =
|
||||
'.word{display:flex}.pivot{color:red}.prefix,.suffix{flex:1}.prefix{text-align:right}'
|
||||
|
||||
word.appendChild(prefix)
|
||||
word.appendChild(pivot)
|
||||
word.appendChild(suffix)
|
||||
shadow.appendChild(style)
|
||||
shadow.appendChild(word)
|
||||
|
||||
this._root = shadow
|
||||
this.wordParts = { prefix, pivot, suffix }
|
||||
}
|
||||
|
||||
connectedCallback() {
|
||||
this.updateDisplay()
|
||||
}
|
||||
|
||||
static get observedAttributes() {
|
||||
return ['word']
|
||||
}
|
||||
|
||||
attributeChangedCallback() {
|
||||
this.updateDisplay()
|
||||
}
|
||||
|
||||
updateDisplay() {
|
||||
const [prefix, pivot, suffix] = pivotize(this.getAttribute('word') || '')
|
||||
this.wordParts.prefix.innerText = prefix
|
||||
this.wordParts.pivot.innerText = pivot
|
||||
this.wordParts.suffix.innerText = suffix
|
||||
}
|
||||
}
|
||||
|
||||
window.customElements.define('rsvp-word', RSVPWord)
|
54
index.html
54
index.html
@ -1,15 +1,47 @@
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
|
||||
<title>Document</title>
|
||||
</head>
|
||||
<style>
|
||||
.marker {
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="root">
|
||||
<textarea id="input">
|
||||
Lorem ipsum dolor sit amet consectetur adipisicing elit. Error fuga eos odit cupiditate harum quibusdam beatae recusandae aut asperiores, molestiae provident modi quia, atque dicta et earum sunt assumenda inventore.</textarea
|
||||
>
|
||||
<button class="action" action="load" id="load">Load Text</button>
|
||||
<div>
|
||||
Info:
|
||||
</div>
|
||||
<div>
|
||||
<div>
|
||||
<div class="marker">|</div>
|
||||
<rsvp-word id="output"></rsvp-word>
|
||||
<div class="marker">|</div>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<button class="action" action="prevSentence" id="prevSentence">
|
||||
<<
|
||||
</button>
|
||||
<button class="action" action="prevWord" id="prevWord"><</button>
|
||||
<button class="action" action="nextWord" id="nextWord">></button>
|
||||
<button class="action" action="nextSentence" id="nextSentence">
|
||||
>>
|
||||
</button>
|
||||
</div>
|
||||
<div>
|
||||
<button class="action" action="play-pause" id="play"></button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="index.js"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
</body>
|
||||
</html>
|
||||
|
67
index.js
67
index.js
@ -0,0 +1,67 @@
|
||||
import { Chapter } from './src/Chapter.js'
|
||||
import { Player } from './src/Player.js'
|
||||
|
||||
import './components/rsvp-word.js'
|
||||
|
||||
const inputText = document.getElementById('input')
|
||||
const output = document.getElementById('output')
|
||||
|
||||
const prevSentenceButton = document.getElementById('prevSentence')
|
||||
const prevWordButton = document.getElementById('prevWord')
|
||||
const nextWordButton = document.getElementById('nextWord')
|
||||
const nextSentenceButton = document.getElementById('nextSentence')
|
||||
|
||||
const playButton = document.getElementById('play')
|
||||
|
||||
let chapter = new Chapter(inputText.value, 10)
|
||||
let player = new Player()
|
||||
|
||||
function updateUI() {
|
||||
prevSentenceButton.disabled = !chapter.hasPrevSentence()
|
||||
prevWordButton.disabled = !chapter.hasPrevWord()
|
||||
nextWordButton.disabled = !chapter.hasNextWord()
|
||||
nextSentenceButton.disabled = !chapter.hasNextSentence()
|
||||
|
||||
playButton.innerText = player.playing ? 'pause' : 'start'
|
||||
|
||||
output.setAttribute('word', chapter.currentSegment)
|
||||
}
|
||||
|
||||
function tick() {
|
||||
if (!chapter.hasNext()) {
|
||||
player.stop()
|
||||
} else {
|
||||
chapter.next()
|
||||
}
|
||||
updateUI()
|
||||
}
|
||||
|
||||
function handleClick(e) {
|
||||
switch (e.target.getAttribute('action')) {
|
||||
case 'load':
|
||||
chapter = new Chapter(inputText.value, 10)
|
||||
break
|
||||
case 'prevSentence':
|
||||
chapter.prevSentence()
|
||||
break
|
||||
case 'nextSentence':
|
||||
chapter.nextSentence()
|
||||
break
|
||||
case 'prevWord':
|
||||
chapter.prevWord()
|
||||
break
|
||||
case 'nextWord':
|
||||
chapter.nextWord()
|
||||
break
|
||||
case 'play-pause':
|
||||
player.toggle()
|
||||
break
|
||||
}
|
||||
updateUI()
|
||||
}
|
||||
|
||||
for (let button of document.getElementsByClassName('action')) {
|
||||
button.onclick = handleClick
|
||||
}
|
||||
player.subscribe('main', tick)
|
||||
updateUI()
|
52
spec/Chapter.spec.js
Normal file
52
spec/Chapter.spec.js
Normal file
@ -0,0 +1,52 @@
|
||||
import { Chapter, _privates } from '../src/Chapter.js'
|
||||
|
||||
const { getNextBiggerNumber } = _privates
|
||||
|
||||
describe('Chapter', function() {
|
||||
const demoText =
|
||||
'Hello World. Foo bar baz. Lorem ipsum dolor sit. Worttrennungsalgorithmus.'
|
||||
|
||||
it('Iterates through segments', function() {
|
||||
let chapter = new Chapter(demoText, 7)
|
||||
let i = 1
|
||||
while (chapter.next()) i++
|
||||
expect(i).toBe(13)
|
||||
})
|
||||
|
||||
it('Iterates through words', function() {
|
||||
let chapter = new Chapter(demoText, 7)
|
||||
let i = 1
|
||||
while (chapter.nextWord()) i++
|
||||
expect(i).toBe(10)
|
||||
})
|
||||
|
||||
it('Iterates through sentences', function() {
|
||||
let chapter = new Chapter(demoText)
|
||||
let i = 1
|
||||
while (chapter.nextSentence()) i++
|
||||
expect(i).toBe(4)
|
||||
})
|
||||
|
||||
it('Iterators return null on finish', function() {
|
||||
let chapter = new Chapter(demoText, 7)
|
||||
let cur
|
||||
while ((cur = chapter.next())) {}
|
||||
expect(cur).toBe(null)
|
||||
while ((cur = chapter.prev())) {}
|
||||
expect(cur).toBe(null)
|
||||
while ((cur = chapter.nextWord())) {}
|
||||
expect(cur).toBe(null)
|
||||
while ((cur = chapter.prevWord())) {}
|
||||
expect(cur).toBe(null)
|
||||
while ((cur = chapter.nextSentence())) {}
|
||||
expect(cur).toBe(null)
|
||||
while ((cur = chapter.prevSentence())) {}
|
||||
expect(cur).toBe(null)
|
||||
})
|
||||
})
|
||||
|
||||
describe('nextBiggerNumber', function() {
|
||||
it('Returns a the next bigger number', function() {
|
||||
expect(getNextBiggerNumber(5, [1, 4, 6])).toBe(6)
|
||||
})
|
||||
})
|
@ -1,11 +0,0 @@
|
||||
import { breakText } from '../src/textProcessing/breakText.js'
|
||||
|
||||
describe('breakText', function() {
|
||||
it('returns an array', function() {
|
||||
expect(Array.isArray(breakText('Hello World'))).toBeTruthy()
|
||||
})
|
||||
|
||||
it('array is of appropriate length', function() {
|
||||
expect(breakText('Hello World').length).toBe(2)
|
||||
})
|
||||
})
|
@ -1,51 +0,0 @@
|
||||
import {
|
||||
breakWordSegment,
|
||||
splitLongWord,
|
||||
breakWord
|
||||
} from '../src/textProcessing/breakWord.js'
|
||||
|
||||
describe('breakWordSegment', function() {
|
||||
it('returns an array', function() {
|
||||
expect(Array.isArray(breakWordSegment('asdf'))).toBeTruthy()
|
||||
})
|
||||
it('returns triples', function() {
|
||||
expect(breakWordSegment('asdf').length).toBe(3)
|
||||
})
|
||||
})
|
||||
|
||||
describe('splitLongWord', function() {
|
||||
it('returns an array', function() {
|
||||
expect(Array.isArray(splitLongWord('asdf'))).toBeTruthy()
|
||||
})
|
||||
|
||||
it('returns the single word by default', function() {
|
||||
let segments = splitLongWord('asdf')
|
||||
expect(segments.length).toBe(1)
|
||||
expect(segments[0]).toBe('asdf')
|
||||
})
|
||||
|
||||
it('returns small words unmodified', function() {
|
||||
let segments = splitLongWord('asdf')
|
||||
expect(segments.length).toBe(1)
|
||||
expect(segments[0]).toBe('asdf')
|
||||
})
|
||||
|
||||
it('splits long words', function() {
|
||||
let segments = splitLongWord('asdf', 3)
|
||||
expect(segments.length).toBe(2)
|
||||
expect(segments[0]).toBe('asd')
|
||||
expect(segments[1]).toBe('f')
|
||||
})
|
||||
|
||||
it('split into even parts', function() {
|
||||
let segments = splitLongWord('1234567890', 9)
|
||||
expect(segments[0].length).toBe(5)
|
||||
expect(segments[1].length).toBe(5)
|
||||
})
|
||||
})
|
||||
|
||||
describe('breakWord', function() {
|
||||
it('returns an array', function() {
|
||||
expect(Array.isArray(breakWord('asdf'))).toBeTruthy()
|
||||
})
|
||||
})
|
@ -1,23 +0,0 @@
|
||||
import {
|
||||
findSentences
|
||||
} from '../src/textProcessing/findSentences.js'
|
||||
|
||||
describe('findSentences', function () {
|
||||
it('returns an array', function () {
|
||||
expect(Array.isArray(findSentences(['Hello', 'World']))).toBeTruthy()
|
||||
})
|
||||
|
||||
it('finds a single sentence', function () {
|
||||
let sentences = findSentences(['Hello'], ['World'])
|
||||
expect(sentences.length).toBe(1)
|
||||
expect(sentences[0]).toBe(0)
|
||||
})
|
||||
|
||||
it('finds two sentences', function () {
|
||||
let sentences = findSentences(['Hello', 'World.', 'Foo', 'bar.'])
|
||||
expect(sentences.length).toBe(2)
|
||||
expect(sentences[0]).toBe(0)
|
||||
expect(sentences[1]).toBe(2)
|
||||
})
|
||||
|
||||
})
|
@ -1,4 +1,3 @@
|
||||
import './breakText.spec.js'
|
||||
import './breakWord.spec.js'
|
||||
import './Chapter.spec.js'
|
||||
import './findPivot.spec.js'
|
||||
import './findSentences.spec.js'
|
||||
import './parseText.spec.js'
|
||||
|
47
spec/parseText.spec.js
Normal file
47
spec/parseText.spec.js
Normal file
@ -0,0 +1,47 @@
|
||||
import { parseText, _privates } from '../src/textProcessing/parseText.js'
|
||||
|
||||
const { splitLongWord } = _privates
|
||||
|
||||
describe('parseText', function() {
|
||||
it('returns an object with expected properties', function() {
|
||||
let parsed = parseText('Hello World. Test Sentence.')
|
||||
expect(parsed.segments).toEqual(['Hello', 'World.', 'Test', 'Sentence.'])
|
||||
expect(parsed.words).toEqual([0, 1, 2, 3])
|
||||
expect(parsed.sentences).toEqual([0, 2])
|
||||
})
|
||||
})
|
||||
|
||||
describe('splitLongWord', function() {
|
||||
it('returns an array', function() {
|
||||
expect(Array.isArray(splitLongWord('asdf'))).toBeTruthy()
|
||||
})
|
||||
|
||||
it('returns the single word by default', function() {
|
||||
expect(splitLongWord('asdf')).toEqual(['asdf'])
|
||||
})
|
||||
|
||||
it('returns small words unmodified', function() {
|
||||
expect(splitLongWord('asdf', 5)).toEqual(['asdf'])
|
||||
expect(splitLongWord('asdf', 4)).toEqual(['asdf'])
|
||||
})
|
||||
|
||||
it('splits long words', function() {
|
||||
expect(splitLongWord('asdf', 3)).toEqual(['as', 'df'])
|
||||
})
|
||||
|
||||
it('split into even parts', function() {
|
||||
expect(splitLongWord('1234567890', 9)).toEqual(['12345', '67890'])
|
||||
})
|
||||
|
||||
it('corner case: uneven length', function() {
|
||||
expect(splitLongWord('123456789', 8)).toEqual(['1234', '56789'])
|
||||
})
|
||||
|
||||
it('corner case: multiple uneven parts', function() {
|
||||
let word = '1234567890123'
|
||||
let segments = splitLongWord(word, 3)
|
||||
expect(segments.reduce((x, y) => x + y, '')).toBe(word)
|
||||
expect(Math.max(...segments.map(seg => seg.length))).toBe(3)
|
||||
expect(Math.min(...segments.map(seg => seg.length)))
|
||||
})
|
||||
})
|
127
src/Chapter.js
127
src/Chapter.js
@ -1,66 +1,119 @@
|
||||
import {
|
||||
findSentences
|
||||
} from "./textProcessing/findSentences";
|
||||
import { parseText } from './textProcessing/parseText.js'
|
||||
|
||||
class Chapter {
|
||||
constructor(text = '') {
|
||||
this.setText(text)
|
||||
export class Chapter {
|
||||
constructor(text, maxLength = -1) {
|
||||
let { segments, words, sentences } = parseText(text, maxLength)
|
||||
this.segments = segments
|
||||
this.words = words
|
||||
this.sentences = sentences
|
||||
this.currentIdx = 0
|
||||
}
|
||||
|
||||
setText(text) {
|
||||
this.words = breakText(text)
|
||||
this.first = 0
|
||||
this.last = words.length - 1
|
||||
this.current = 0
|
||||
this.sentences = findSentences(this.words)
|
||||
get currentSegment() {
|
||||
return this.segments[this.currentIdx]
|
||||
}
|
||||
|
||||
get curWord() {
|
||||
return this.words[this.current]
|
||||
get metainfo() {
|
||||
return {
|
||||
segmentCount: this.segments.length,
|
||||
wordsCount: this.words.length,
|
||||
sentenceCount: this.sentences.length,
|
||||
currentSegment: currentIdx + 1,
|
||||
currentWord: -1,
|
||||
currentSentence: -1
|
||||
}
|
||||
}
|
||||
|
||||
next() {
|
||||
this.current = this.current + 1
|
||||
return this.curWord
|
||||
if (!this.hasNext()) return null
|
||||
this.currentIdx += 1
|
||||
return this.currentSegment
|
||||
}
|
||||
|
||||
prev() {
|
||||
this.current = this.curren - 1
|
||||
return this.curWord
|
||||
if (!this.hasPrev()) return null
|
||||
this.currentIdx -= 1
|
||||
return this.currentSegment
|
||||
}
|
||||
|
||||
hasNext() {
|
||||
return this.current < this.last
|
||||
nextWord() {
|
||||
if (!this.hasNextWord()) return null
|
||||
this.currentIdx = getNextBiggerNumber(this.currentIdx, this.words)
|
||||
return this.currentSegment
|
||||
}
|
||||
|
||||
hasPrev() {
|
||||
this.current > this.first
|
||||
prevWord() {
|
||||
if (!this.hasPrevWord()) return null
|
||||
this.currentIdx = getNextSmallerNumber(this.currentIdx, this.words)
|
||||
return this.currentSegment
|
||||
}
|
||||
|
||||
nextSentence() {
|
||||
for (let sentence of this.sentences) {
|
||||
if (sentence > this.current) {
|
||||
this.current = sentence
|
||||
return this.curWord
|
||||
}
|
||||
}
|
||||
if (!this.hasNextSentence()) return null
|
||||
this.currentIdx = getNextBiggerNumber(this.currentIdx, this.sentences)
|
||||
return this.currentSegment
|
||||
}
|
||||
|
||||
prevSentence() {
|
||||
for (let sentence of this.sentences.reverse()) {
|
||||
if (sentence < this.current) {
|
||||
this.current = sentence
|
||||
return this.curWord
|
||||
}
|
||||
}
|
||||
if (!this.hasPrevSentence()) return null
|
||||
this.currentIdx = getNextSmallerNumber(this.currentIdx, this.sentences)
|
||||
return this.currentSegment
|
||||
}
|
||||
|
||||
hasNext() {
|
||||
return this.currentIdx < this.segments.length - 1
|
||||
}
|
||||
|
||||
hasPrev() {
|
||||
return this.currentIdx > 0
|
||||
}
|
||||
|
||||
hasNextWord() {
|
||||
return this.currentIdx < lastEntry(this.words)
|
||||
}
|
||||
|
||||
hasPrevWord() {
|
||||
return this.currentIdx > this.words[0]
|
||||
}
|
||||
|
||||
hasNextSentence() {
|
||||
return this.current < this.sentences.reverse[0]
|
||||
return this.currentIdx < lastEntry(this.sentences)
|
||||
}
|
||||
|
||||
hasPrevSentence() {
|
||||
|
||||
return this.currentIdx > this.sentences[0]
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* Returns the next bigger number from a sorted Array of numbers.
|
||||
* Returns null if num is the biggest number
|
||||
* @param {Number} idx
|
||||
* @param {Array<Number>} sortedArray
|
||||
*/
|
||||
function getNextBiggerNumber(num, sortedArray) {
|
||||
for (let currentNumber of sortedArray) {
|
||||
if (currentNumber > num) return currentNumber
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next smaller number from a sorted Array of numbers.
|
||||
* Returns null if num is the smallest number
|
||||
* @param {Number} idx
|
||||
* @param {Array<Number>} sortedArray
|
||||
*/
|
||||
function getNextSmallerNumber(num, sortedArray) {
|
||||
let reversedArray = [...sortedArray].reverse()
|
||||
for (let currentNumber of reversedArray) {
|
||||
if (currentNumber < num) return currentNumber
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
function lastEntry(arr) {
|
||||
return arr[arr.length - 1]
|
||||
}
|
||||
|
||||
export const _privates = { getNextBiggerNumber }
|
||||
|
45
src/Player.js
Normal file
45
src/Player.js
Normal file
@ -0,0 +1,45 @@
|
||||
export class Player {
|
||||
constructor(interval = 100) {
|
||||
this.intervalHandle = null
|
||||
this.interval = interval
|
||||
this.subscribers = {}
|
||||
}
|
||||
|
||||
get playing() {
|
||||
return this.intervalHandle !== null
|
||||
}
|
||||
|
||||
start() {
|
||||
clearInterval(this.intervalHandle)
|
||||
this.intervalHandle = setInterval(this.tick.bind(this), this.interval)
|
||||
}
|
||||
|
||||
stop() {
|
||||
clearInterval(this.intervalHandle)
|
||||
this.intervalHandle = null
|
||||
}
|
||||
|
||||
toggle() {
|
||||
if (this.playing) this.stop()
|
||||
else this.start()
|
||||
}
|
||||
|
||||
updateInterval(interval) {
|
||||
this.interval = interval
|
||||
if (this.intervalHandle) this.start()
|
||||
}
|
||||
|
||||
tick() {
|
||||
for (let callback of Object.values(this.subscribers)) {
|
||||
callback()
|
||||
}
|
||||
}
|
||||
|
||||
subscribe(name, callback) {
|
||||
this.subscribers[name] = callback
|
||||
}
|
||||
|
||||
unsubscribe(name) {
|
||||
delete subscribers[name]
|
||||
}
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
export function breakText(text) {
|
||||
let words = []
|
||||
for (let word of text.trim().split(/[ \t\n]/)) {
|
||||
if (word.trim() !== '') {
|
||||
words.push(word)
|
||||
}
|
||||
}
|
||||
return words
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
import { findPivot } from './findPivot.js'
|
||||
|
||||
export function breakWordSegment(word, mode) {
|
||||
let start = 0
|
||||
let end = word.length
|
||||
let pivot = findPivot(word)
|
||||
return [
|
||||
word.slice(start, pivot),
|
||||
word.slice(pivot, pivot + 1),
|
||||
word.slice(pivot + 1, end)
|
||||
]
|
||||
}
|
||||
|
||||
export function splitLongWord(word, maxLength) {
|
||||
if (maxLength === -1) return [word]
|
||||
let segments = []
|
||||
let segmentStart = 0
|
||||
let cur = word.slice(segmentStart, maxLength)
|
||||
while (cur !== '') {
|
||||
segments.push(cur)
|
||||
segmentStart += maxLength
|
||||
cur = word.slice(segmentStart, segmentStart + maxLength)
|
||||
}
|
||||
return segments
|
||||
}
|
||||
|
||||
export function breakWord(word, mode, maxLength = -1) {
|
||||
return splitLongWord(word, maxLength).map(word =>
|
||||
breakWordSegment(word, mode)
|
||||
)
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
export function findPivot(word) {
|
||||
const table = {
|
||||
1: 0,
|
||||
2: 0,
|
||||
3: 1,
|
||||
4: 1,
|
||||
5: 2,
|
||||
6: 2,
|
||||
7: 2,
|
||||
8: 3,
|
||||
9: 3
|
||||
}
|
||||
return typeof table[word.length] === 'undefined' ? 4 : table[word.length]
|
||||
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
export function findSentences(words) {
|
||||
let sentences = []
|
||||
let sentenceFlag = true
|
||||
for (let [idx, word] of words.entries()) {
|
||||
if (sentenceFlag) {
|
||||
sentences.push(idx)
|
||||
sentenceFlag = false
|
||||
}
|
||||
if (word.endsWith('.')) {
|
||||
sentenceFlag = true
|
||||
}
|
||||
}
|
||||
return sentences
|
||||
}
|
76
src/textProcessing/parseText.js
Normal file
76
src/textProcessing/parseText.js
Normal file
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Returns an object containing the segmented text and metainfo about word and
|
||||
* sentence beginnings
|
||||
* @param {String} text
|
||||
* @param {Number} maxLength
|
||||
* @returns {Object}
|
||||
*/
|
||||
export function parseText(text, maxLength) {
|
||||
let segments = []
|
||||
let words = []
|
||||
let sentences = []
|
||||
let curIdx = 0
|
||||
let sentenceFlag = true
|
||||
|
||||
for (let word of extractWords(text)) {
|
||||
// fill metainfo
|
||||
words.push(curIdx)
|
||||
if (sentenceFlag) {
|
||||
sentences.push(curIdx)
|
||||
}
|
||||
// fragmentize word if necessary and fill segments
|
||||
let fragments = splitLongWord(word, maxLength)
|
||||
segments.push(...fragments)
|
||||
curIdx += fragments.length
|
||||
|
||||
// set flag if next word is sentence beginning
|
||||
sentenceFlag = word.endsWith('.')
|
||||
}
|
||||
|
||||
return { segments, words, sentences }
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Array words from a text. Words are identified by whitespace.
|
||||
* @param {String} text
|
||||
* @returns {Array<String>}
|
||||
*/
|
||||
function extractWords(text) {
|
||||
let words = []
|
||||
for (let word of text.trim().split(/[ \t\n]/)) {
|
||||
if (word.trim() !== '') {
|
||||
words.push(word)
|
||||
}
|
||||
}
|
||||
return words
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a word evenly in parts with maximum length of maxLength.
|
||||
* Todo: more intelligent hyphenation algorithm
|
||||
* @param {String} word
|
||||
* @param {Number} maxLength
|
||||
* @returns {Array<String>} word fragents
|
||||
*/
|
||||
function splitLongWord(word, maxLength = -1) {
|
||||
if (maxLength === -1) return [word]
|
||||
if (maxLength === word.length) return [word]
|
||||
|
||||
let fragments = []
|
||||
let numParts = Math.floor(word.length / maxLength) + 1
|
||||
let step = word.length / numParts
|
||||
let start = 0
|
||||
let end = step
|
||||
|
||||
while (start < word.length) {
|
||||
fragments.push(word.slice(start, end))
|
||||
start += step
|
||||
end += step
|
||||
}
|
||||
for (let i = 0; i < fragments.length - 1; i++) {
|
||||
fragments[i] = fragments[i] + '-'
|
||||
}
|
||||
return fragments
|
||||
}
|
||||
|
||||
export const _privates = { extractWords, splitLongWord }
|
56
src/textProcessing/pivotize.js
Normal file
56
src/textProcessing/pivotize.js
Normal file
@ -0,0 +1,56 @@
|
||||
function calculatePivot(word) {
|
||||
const splits = [
|
||||
0,
|
||||
0,
|
||||
0, //012
|
||||
1,
|
||||
1,
|
||||
1, //345
|
||||
2,
|
||||
2,
|
||||
2,
|
||||
2, //6789
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3,
|
||||
3, //10-15
|
||||
4,
|
||||
4,
|
||||
4,
|
||||
4,
|
||||
4,
|
||||
4,
|
||||
4,
|
||||
4
|
||||
]
|
||||
return typeof splits[word.length] === 'undefined' ? 5 : splits[word.length]
|
||||
}
|
||||
|
||||
export function pivotIdx(word) {
|
||||
const table = {
|
||||
1: 0,
|
||||
2: 0,
|
||||
3: 1,
|
||||
4: 1,
|
||||
5: 2,
|
||||
6: 2,
|
||||
7: 2,
|
||||
8: 3,
|
||||
9: 3,
|
||||
10: 3
|
||||
}
|
||||
return typeof table[word.length] === 'undefined' ? 4 : table[word.length]
|
||||
}
|
||||
|
||||
export function pivotize(word, mode) {
|
||||
let start = 0
|
||||
let end = word.length
|
||||
let pivot = calculatePivot(word)
|
||||
return [
|
||||
word.slice(start, pivot),
|
||||
word.slice(pivot, pivot + 1),
|
||||
word.slice(pivot + 1, end)
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user