Utf8Decoder Class / dart:utf Library / Dart API Reference

Utf8Decoder Class

Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The parameters can set an offset into a list of bytes (as int), limit the length of the values to be decoded, and override the default Unicode replacement character. Set the replacementCharacter to null to throw an IllegalArgumentException rather than replace the bad value. The return value from this method can be used as an Iterable (e.g. in a for-loop).

Implements

Iterator<E>

Constructors

Code new Utf8Decoder._fromListRangeIterator(_ListRange source, [int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) #

Utf8Decoder._fromListRangeIterator(_ListRange source, [
    int this.replacementCodepoint =
    UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
    utf8EncodedBytesIterator = source.iterator();

Code new Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) #

Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,
    int this.replacementCodepoint =
    UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
    utf8EncodedBytesIterator = (new _ListRange(utf8EncodedBytes, offset,
        length)).iterator();

Methods

Code List<int> decodeRest() #

Decode the remaininder of the characters in this decoder into a [List].

List<int> decodeRest() {
  List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining);
  int i = 0;
  while (hasNext()) {
    codepoints[i++] = next();
  }
  if (i == codepoints.length) {
    return codepoints;
  } else {
    List<int> truncCodepoints = new List<int>(i);
    truncCodepoints.setRange(0, i, codepoints);
    return truncCodepoints;
  }
}

Code bool hasNext() #

bool hasNext() => utf8EncodedBytesIterator.hasNext();

Code int next() #

int next() {
  int value = utf8EncodedBytesIterator.next();
  int additionalBytes = 0;

  if (value < 0) {
    if (replacementCodepoint != null) {
      return replacementCodepoint;
    } else {
      throw new IllegalArgumentException(
          "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
    }
  } else if (value <= _UTF8_ONE_BYTE_MAX) {
    return value;
  } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
    if (replacementCodepoint != null) {
      return replacementCodepoint;
    } else {
      throw new IllegalArgumentException(
          "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
    }
  } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {
    value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;
    additionalBytes = 1;
  } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {
    value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;
    additionalBytes = 2;
  } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {
    value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;
    additionalBytes = 3;
  } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {
    value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;
    additionalBytes = 4;
  } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {
    value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;
    additionalBytes = 5;
  } else if (replacementCodepoint != null) {
    return replacementCodepoint;
  } else {
    throw new IllegalArgumentException(
        "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
  }
  int j = 0;
  while (j < additionalBytes && utf8EncodedBytesIterator.hasNext()) {
    int nextValue = utf8EncodedBytesIterator.next();
    if (nextValue > _UTF8_ONE_BYTE_MAX &&
        nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
      value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK));
    } else {
      // if sequence-starting code unit, reposition cursor to start here
      if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) {
        utf8EncodedBytesIterator.backup();
      }
      break;
    }
    j++;
  }
  bool validSequence = (j == additionalBytes && (
      value < UNICODE_UTF16_RESERVED_LO ||
      value > UNICODE_UTF16_RESERVED_HI));
  bool nonOverlong =
      (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
      (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||
      (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);
  bool inRange = value <= UNICODE_VALID_RANGE_MAX;
  if (validSequence && nonOverlong && inRange) {
    return value;
  } else if (replacementCodepoint != null) {
    return replacementCodepoint;
  } else {
    throw new IllegalArgumentException(
        "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");
  }
}

Fields

Code final int replacementCodepoint #

final int replacementCodepoint;

Code final _ListRangeIterator utf8EncodedBytesIterator #

final _ListRangeIterator utf8EncodedBytesIterator;

dart:core

dart:coreimpl

dart:crypto

dart:html

dart:io

dart:isolate

dart:json

dart:uri

dart:utf

dart:web

intl

unittest