MacPython Logo from __future__ import *

Kailash and Friends Kailash Kher Kaipa

online mp3 Anoice albums buy Amund Maarud albums online Asia online CD Andy M. Stewart buy tracks Axis online Astral Rising A Beautiful Machine download CD Aereda buy tracks Aksent online tracks Absidia Atrium Carceri A Beautiful Machine Absolum buy CD Aryan Wind and Brumalis and Valhalla Saints online music Atomsmasher download albums AK1200 download music Angelzoom online CD Arturo Mantovani and his Orchestra buy music 16 buy tracks Ashtorath online CD Aimee Mann buy music Anael And Bradfield buy mp3 Autumnblaze download mp3 Aggrolites download CD Arj Snoek buy albums Ada buy CD Aalto Andy With Rama West A Beautiful Machine Absolum online tracks Asura albums online Albert Lee 4 Non Blondes A Beautiful Machine Absolum download albums Andrew Lloyd Webber and Ar Rahman online music African Head Charge download mp3 Amber Asylum online music Analena online music ANTIX feat ROB SALMON A.R. Rahman A Beautiful Machine Absolum online tracks African Blackwood buy mp3 Axis buy mp3 Alan Menken buy music Amoebic Dysentery buy Alph Secakuku A Beautiful Machine albums download Albita online Amparo Ochoa A Beautiful Machine download tracks Andy Partridge and Harold Budd download tracks Anubian Lights Alient Project A Beautiful Machine Absolum buy albums Antonio Forcione download CD Ali G Indahouse online mp3 Art and Jazz Messengers Blakey download Arab Strap A Beautiful Machine online albums Adema buy Agua de Annique A Beautiful Machine buy CD Avalanches download tracks Acroma Andi Deris A Beautiful Machine Absolum download tracks American Steel download albums Amanda Perez online 999 A Beautiful Machine download mp3 Arild Andersen download CD American Steel buy tracks Absolute Beginner download tracks Anubi online albums Ancient Wisdom online A Verse Unsung A Beautiful Machine buy music Aghast Andromeda Island A Beautiful Machine Absolum download Arlo Guthrie A Beautiful Machine online mp3 Aavepyora online albums Achillea buy Andrew Bird A Beautiful Machine buy music Alexey Aigui and Ensemble 4'33'' albums buy Abbey Lincoln and Archie Shepp download albums Archive download CD A Guy Called Gerald feat. D.S. download music Al Di Meola online music Abigail download music Angel Witch online music Adelaide

2004-05-18

Monkeypatching readline support into Python’s UTF-16 codecs

Filed under: python — bob @ 9:23 pm

[ utf16reader.py ]

__all__ = []

BUFFER_SIZE = 256

def readline_unsized(self, buff):
    while True:
        lines = buff.splitlines(True)
        if len(lines) > 1:
            return (u''.join(lines[1:]), lines[0])
        chunk = self.read(BUFFER_SIZE)
        if not chunk:
            return (u'', buff)
        else:
            buff += chunk
    
def readline_sized(self, buff, size):
    while True:
        lines = buff.splitlines(True)
        if len(lines) > 1:
            rval = lines.pop(0)
            if len(rval) > size:
                lines.insert(0, rval[size:])
                rval = rval[:size]
            return (u''.join(lines), rval)
        bytesread = len(buff)
        if size > bytesread:
            chunk = self.read(min(BUFFER_SIZE, size - bytesread))
            if not chunk:
                return (u'', buff)
            else:
                buff += chunk
        else:
            return (buff[size:], buff[:size])

def readline(self, size=None):
    buff = self._utf16_readline_buffer
    if size is None:
        buff, rval = readline_unsized(self, buff)
    else:
        buff, rval = readline_sized(self, buff, size)
    self._utf16_readline_buffer = buff
    return rval

def install():
    import encodings.utf_16 as utf_16
    import encodings.utf_16_be as utf_16_be
    import encodings.utf_16_le as utf_16_le
    for mod in (utf_16, utf_16_be, utf_16_le):
        mod.StreamReader.readline = readline
        mod.StreamReader._utf16_readline_buffer = u''

def test():
    from StringIO import StringIO
    import codecs
    from itertools import izip
    STRINGS = [
        u'\u304a\u3084\u3059\u307f\u306a\u3055\u3044n',
        u'Oysasumi nasain',
        u'Goodnight',
    ] * 500
    for codec in ('utf_16', 'utf_16_le', 'utf_16_be'):
        utxt = u''.join(STRINGS)
        txt = u''.join(STRINGS).encode(codec)
        def testreader():
           return codecs.getreader(codec)(StringIO(txt))
        # test readline()
        for new, orig in izip(testreader(), STRINGS):
            assert new == orig, '%r != %r' % (new, orig,)
        # test readlines()
        assert testreader().readlines() == STRINGS
        # test sized readline()
        idx = 0
        rdr = testreader()
        while idx < len(utxt):
            nextline  = rdr.readline(5)
            assert len(nextline) <= 5, 'len(%r) > 5' % (nextline,)
            if nextline.splitlines()[0] != nextline:
                # there was a newline
                nextchunk = utxt[idx:idx+len(nextline)]
                idx += len(nextline)
                assert nextchunk == nextline, '[a] %r != %r' % (nextline, nextchunk)
            else:
                nextchunk = utxt[idx:idx+5]
                idx += 5
                assert nextline == nextchunk, '[b] %r != %r' % (nextline, nextchunk)

if __name__ == '__main__':
    install()
    try:
        test()
    except:
        import sys, pdb, traceback
        tb = sys.exc_info()[2]
        traceback.print_exc()
        pdb.post_mortem(tb)

(revised based on comments from MA Lemburg)

No Comments »

No comments yet.

RSS feed for comments on this post. TrackBack URI

Leave a comment

WP-Hashcash: protecting you from spam.

Powered by WordPress