Parsing graffles with Python


#1

Happily used OG for donkeys years and occasionally inspected what looks like straight xml with a text editor like BBedit.
I was trying to use an OG file as a quick source of data for a Python line intersection algorithm but ran into a problem.
When I try to read the file (like I would with a xml/txt/html file) in order to parse it, I get

“UnicodeDecodeError: ‘utf-8’ codec can’t decode byte 0x8b in position 1: invalid start byte”.

Now the file looks like standard xml and indeed the same file when Exported as a .vdx file is read fine.
Anyone read .graffle files with Python?
Do graffle files use a different codec?


#2

You will, of course, need to:

  • Check whether the particular .graffle is a flat file or a package (containing for example imported graphic files as well as a preview file and a data.plist file)
  • Un(g)zip either:
    • the whole flat .graffle file,
    • or just the data.plist (which contains the XML text) inside the package.

#3

The interface of the Python plistlib differs between Python 2.7 and Python 3, but using the 2.7 version, I find that I can parse gunzipped graffle XML (from flat files and packages to Python objects) as in the snippet below:

(Which you should be able to prune back quite a lot, if you want – I tend to paste from library code without pruning)

'''Parse OmniGraffle XML'''

from os.path import expanduser, isdir, isfile
import gzip
import binascii
import plistlib


# main :: IO ()
def main():
    '''Test'''

    fpFlat = '~/Desktop/flat.graffle'
    fpGraffle = '~/Desktop/package.graffle'

    print([
        maybeGraffleParse(fp) for fp
        in [fpFlat, fpGraffle]
    ])

# --------------------OMNIGRAFFLE FILES---------------------

def maybeGraffleParse(fp):
    '''Just a plist parse if .graffle XML found at
       the supplied path and successfully parsed,
       else Nothing.
    '''
    fpPlist = fp + '/data.plist'
    return bindMay(
        bindMay(
            Just(fp) if doesFileExist(fp) else (
                Just(fpPlist) if doesDirectoryExist(fp) and (
                    doesFileExist(fpPlist)
                ) else Nothing()
            )
        )(maybeUnGZipped)
    )(
        lambda strXML: Just(
            plistlib.readPlistFromString(strXML)
        )
    )


def maybeUnGZipped(fp):
    '''Just The contents of a file assumed to 
       exist, if it is gzipped.
       Otherwise, Nothing.
    '''
    fpFull = expanduser(fp)
    with open(fpFull, 'rb') as test_f:
        if b'1f8b' == binascii.hexlify(test_f.read(2)):
            with gzip.open(expanduser(fpFull), 'rb') as f:
                return Just(f.read())
        else:
            return Nothing()
    

# -------------------------GENERIC-------------------------

# Just :: a -> Maybe a
def Just(x):
    '''Constructor for an inhabited Maybe (option type) value.
       Wrapper containing the result of a computation.
    '''
    return {'type': 'Maybe', 'Nothing': False, 'Just': x}


# Nothing :: Maybe a
def Nothing():
    '''Constructor for an empty Maybe (option type) value.
       Empty wrapper returned where a computation is not possible.
    '''
    return {'type': 'Maybe', 'Nothing': True}


# bindMay (>>=) :: Maybe a -> (a -> Maybe b) -> Maybe b
def bindMay(m):
    '''bindMay provides the mechanism for composing a
       sequence of (a -> Maybe b) functions.
       If m is Nothing, it is passed straight through.
       If m is Just(x), the result is an application
       of the (a -> Maybe b) function (mf) to x.
    '''
    return lambda mf: (
        m if m.get('Nothing') else mf(m.get('Just'))
    )

# from os.path import (expanduser, isdir, isfile)
# doesFileExist :: FilePath -> IO Bool
def doesFileExist(fp):
    '''True if a file is found at the
       the filePath fp, after expansion of
       any tilde.
    '''
    return isfile(expanduser(fp))


# from os.path import (expanduser, isdir, isfile)
# doesDirectoryExist :: FilePath -> IO Bool
def doesDirectoryExist(fp):
    '''True if a directory is found at the
       the filePath fp, after expansion of
       any tilde.
    '''
    return isdir(expanduser(fp))


# MAIN ---
if __name__ == '__main__':
    main()

#4

PS, if you are using Python3, then plistlib.readPlistFromBytes or plistlib.loads, so the maybeGraffleParse function might, in context, look something like:

'''Parse OmniGraffle XML (Python 3)'''

from os.path import expanduser, isdir, isfile
from functools import reduce
import binascii
import plistlib
import gzip


# main :: IO ()
def main():
    '''Test'''

    fpFlat = '~/Desktop/flat.graffle'
    fpGraffle = '~/Desktop/package.graffle'

    print([
        maybeGraffleParse(fp) for fp
        in [fpFlat, fpGraffle]
    ])

# --------------------OMNIGRAFFLE FILES---------------------

# maybeGraffleParse :: FilePath -> Maybe Dict
def maybeGraffleParse(fp):
    '''Just a plist parse if .graffle XML found at
       the supplied path and successfully parsed,
       else Nothing.
    '''
    def parse(xmlPlist):
        try:
            return Just(
                plistlib.loads(
                    xmlPlist
                )
            )
        except plistlib.InvalidFileException:
            return Nothing()

    fpPlist = fp + '/data.plist'
    return bindMay(
        bindMay(
            Just(fp) if doesFileExist(fp) else (
                Just(fpPlist) if doesDirectoryExist(fp) and (
                    doesFileExist(fpPlist)
                ) else Nothing()
            )
        )(maybeUnGZipped)
    )(parse)


def maybeUnGZipped(fp):
    '''Just The contents of a file assumed to
       exist, if it is gzipped.
       Otherwise, Nothing.
    '''
    fpFull = expanduser(fp)
    with open(fpFull, 'rb') as test_f:
        if b'1f8b' == binascii.hexlify(test_f.read(2)):
            with gzip.open(expanduser(fpFull), 'rb') as f:
                return Just(f.read())
        else:
            return Nothing()


# -------------------------GENERIC-------------------------

# Just :: a -> Maybe a
def Just(x):
    '''Constructor for an inhabited Maybe (option type) value.
       Wrapper containing the result of a computation.
    '''
    return {'type': 'Maybe', 'Nothing': False, 'Just': x}


# Nothing :: Maybe a
def Nothing():
    '''Constructor for an empty Maybe (option type) value.
       Empty wrapper returned where a computation is not possible.
    '''
    return {'type': 'Maybe', 'Nothing': True}


# bindMay (>>=) :: Maybe a -> (a -> Maybe b) -> Maybe b
def bindMay(m):
    '''bindMay provides the mechanism for composing a
       sequence of (a -> Maybe b) functions.
       If m is Nothing, it is passed straight through.
       If m is Just(x), the result is an application
       of the (a -> Maybe b) function (mf) to x.
    '''
    return lambda mf: (
        m if m.get('Nothing') else mf(m.get('Just'))
    )

# compose :: ((a -> a), ...) -> (a -> a)
def compose(*fs):
    '''Composition, from right to left,
       of a series of functions.
    '''
    return lambda x: reduce(
        lambda a, f: f(a),
        fs[::-1], x
    )

# from os.path import (expanduser, isdir, isfile)
# doesFileExist :: FilePath -> IO Bool
def doesFileExist(fp):
    '''True if a file is found at the
       the filePath fp, after expansion of
       any tilde.
    '''
    return isfile(expanduser(fp))


# from os.path import (expanduser, isdir, isfile)
# doesDirectoryExist :: FilePath -> IO Bool
def doesDirectoryExist(fp):
    '''True if a directory is found at the
       the filePath fp, after expansion of
       any tilde.
    '''
    return isdir(expanduser(fp))


# MAIN ---
if __name__ == '__main__':
    main()

#5

Thanks, draft8, this does more than I was expecting! I’ll unpick it sometime, but this meets immediate needs nicely. Ta again!