function Tokeniser() {
    this.Char_EOF = 0;
    this.machineState = DataState;
    this.contentModel = PCDATA;
    this.inputStreamPos = 0;
    this.tokenStream = [];
    this.characterToken = "";
    this.currentToken = null;
    this.currentCharacter = this.Char_EOF;
}

Tokeniser.prototype.tokenise = function (str) {
    str = str.replace(/\r\n?/g, "\n").replace(/\0/g, function() { parseError(); return "\uFFFD" });
    this.inputStream = str;

    this.run();
    return this.tokenStream;
}

Tokeniser.prototype.emitToken = function (t)
{
    if (this.characterToken.length)
    {
        this.tokenStream.push(["Character", this.characterToken]);
        this.characterToken = "";
    }
    this.tokenStream.push(t);
}

Tokeniser.prototype.reconsumeCharacter = function () { --this.inputStreamPos }
Tokeniser.prototype.emitCharacterToken = function (c) { this.characterToken += String.fromCharCode(c) }
Tokeniser.prototype.emitEOFToken = function () { this.emitToken("EOF") }
Tokeniser.prototype.consumeCharacter = function () {
    var c;
    if (this.inputStreamPos >= this.inputStream.length)
        c = this.Char_EOF;
    else
        c = this.inputStream.charCodeAt(this.inputStreamPos);
    this.inputStreamPos++;
    return c;
}
Tokeniser.prototype.consumeAndEmitEntity = function () { }
Tokeniser.prototype.consumeAndAppendEntity = function () { }
Tokeniser.prototype.createStartTagToken = function () { this.currentToken = ["StartTag", "", [], false] }
Tokeniser.prototype.createEndTagToken = function () { this.currentToken = ["EndTag", "", [], false] }
Tokeniser.prototype.createTagTokenAttribute = function () { this.currentToken[2].push(["", ""]); this.currentToken[3] = false; }
Tokeniser.prototype.createCommentToken = function () { this.currentToken = ["Comment", ""] }
Tokeniser.prototype.createDoctypeToken = function () { this.currentToken = ["DOCTYPE", "", null, null, true] }
Tokeniser.prototype.emitCurrentTagToken = function () { this.emitToken(this.currentToken) }
Tokeniser.prototype.emitCurrentCommentToken = function () { this.emitToken(this.currentToken) }
Tokeniser.prototype.emitCurrentDoctypeToken = function () { this.emitToken(this.currentToken) }
Tokeniser.prototype.handleDuplicateAttributes = function () {
    var n = this.currentToken[2][this.currentToken[2].length-1][0];
    for (var i = 0; i < this.currentToken[2].length-1; ++i) {
        if (this.currentToken[2][i][0] == n) {
            this.parseError();
            this.currentToken[3] = true;
            this.currentToken[2].pop();
            return;
        }
    }
}
Tokeniser.prototype.appendToTagTokenName = function (c) { this.currentToken[1] += String.fromCharCode(c) }
Tokeniser.prototype.appendToTagTokenAttributeName = function (c) { this.currentToken[2][this.currentToken[2].length-1][0] += String.fromCharCode(c) }
Tokeniser.prototype.appendToTagTokenAttributeValue = function (c) { if (!this.currentToken[3]) this.currentToken[2][this.currentToken[2].length-1][1] += String.fromCharCode(c) }
Tokeniser.prototype.appendToCommentToken = function (c) { this.currentToken[1] += String.fromCharCode(c) }
Tokeniser.prototype.appendToDoctypeTokenName = function (c) { this.currentToken[1] += String.fromCharCode(c) }
Tokeniser.prototype.appendToDoctypeTokenPubId = function (c) { this.currentToken[2] += String.fromCharCode(c) }
Tokeniser.prototype.appendToDoctypeTokenSysId = function (c) { this.currentToken[3] += String.fromCharCode(c) }
Tokeniser.prototype.setDoctypeTokenIncorrect = function () { this.currentToken[4] = false }
Tokeniser.prototype.setDoctypeTokenPubIdEmpty = function () { this.currentToken[2] = "" }
Tokeniser.prototype.setDoctypeTokenSysIdEmpty = function () { this.currentToken[3] = "" }
Tokeniser.prototype.parseError = function () { this.emitToken("ParseError") }
Tokeniser.prototype.parseErrorIfNonpermittedSlash = function () {
    if (! (this.inputStream[this.inputStreamPos] == '>' &&
        this.currentToken[0] == 'StartTag' &&
        this.currentToken[1].match(/^(base|link|meta|hr|br|img|embed|param|area|col|input)$/)))
            this.parseError();
}
Tokeniser.prototype.parseErrorIfEndTagWithAttributes = function () {
    if (this.currentToken[0] == 'EndTag' && this.currentToken[2].length)
        this.parseError();
}
Tokeniser.prototype.getOldCharacter = function (n) { return n >= this.inputStreamPos ? 0 : this.inputStream.charCodeAt(this.inputStreamPos - n - 1) }
Tokeniser.prototype.isFollowedBy = function (s) { return this.inputStream.substr(this.inputStreamPos-1, s.length).toLowerCase() == s }
Tokeniser.prototype.isEndOfCData = function () { return false } // XXX

Tokeniser.prototype.hasConsumableEntity = function (attr) {
    var s = this.inputStream.substr(this.inputStreamPos);
    if (s.match(/^([\t\u000A\u000B\r <&]|$)/))
        return false;
    var r;
    var n;
    var len;
    if (r = s.match(/^#([0-9]+)(;?)/)) {
        n = +r[1];
        len = r[0].length;
        if (!r[2])
            this.parseError();
    } else if (r = s.match(/^#[xX]([0-9A-Fa-f]+)(;?)/)) {
        n = +("0x"+r[1]);
        len = r[0].length;
        if (!r[2])
            this.parseError();
    } else if (s.match(/^#/)) {
        this.parseError();
        return false;
    }
    if (n !== undefined) {
        if (this.entityMap[n]) {
            this.parseError();
            n = this.entityMap[n];
        } else if (n == 0 || n > 0x10FFFF || (n >= 0xD800 && n <= 0xDFFF)) {
            this.parseError();
            n = 0xFFFD;
        }
    } else {
        if (r = this.entityNameMatch.exec(s)) {
            if (r[1][r[1].length-1] != ';')
            {
                this.parseError();
                if (attr && !this.entityNameMatchAttr.exec(s))
                    return false;
            }
            n = this.entityNameValues[r[1]];
            len = r[0].length;
        } else {
            this.parseError();
            return false;
        }
    }

    if (attr)
        this.appendToTagTokenAttributeValue(n);
    else
        this.emitCharacterToken(n);
    this.inputStreamPos += len;
    return true;
}
