Source code for edeposit.amqp.calibre.calibre

#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Interpreter version: python 2.7
#
# Imports =====================================================================
"""
Lowlevel conversion API for calibre's ``ebook-convert``.
"""
import os
import gc
from base64 import b64encode, b64decode
from tempfile import NamedTemporaryFile as NTFile

import sh

from structures import INPUT_FORMATS, OUTPUT_FORMATS, ConversionResponse


# Functions & objects =========================================================
def _wrap(text, columns=80):
    """
    Own "dumb" reimplementation of textwrap.wrap().

    This is because calling .wrap() on bigger strings can take a LOT of
    processor power. And I mean like 8 seconds of 3GHz CPU just to wrap 20kB of
    text without spaces.

    Args:
        text (str): Text to wrap.
        columns (int): Wrap after `columns` characters.

    Returns:
        str: Wrapped text.
    """
    out = []
    for cnt, char in enumerate(text):
        out.append(char)

        if (cnt + 1) % columns == 0:
            out.append("\n")

    return "".join(out)


[docs]def convert(input_format, output_format, b64_data): """ Convert `b64_data` fron `input_format` to `output_format`. Args: input_format (str): Specification of input format (pdf/epub/whatever), see :attr:`INPUT_FORMATS` for list. output_format (str): Specification of output format (pdf/epub/..), see :attr:`OUTPUT_FORMATS` for list. b64_data (str): Base64 encoded data. Returns: ConversionResponse: `namedtuple` structure with information about \ output ``format``, data (``b64_data``) and \ ``protocol`` from conversion. Structure is defined\ in :class:`.ConversionResponse`. Raises: AssertionError: When bad arguments are handed over. UserWarning: When conversion failed. """ # checks assert input_format in INPUT_FORMATS, "Unsupported input format!" assert output_format in OUTPUT_FORMATS, "Unsupported output format!" with NTFile(mode="wb", suffix="." + input_format, dir="/tmp") as ifile: ofilename = ifile.name + "." + output_format # save received data to the temporary file ifile.write(b64decode(b64_data)) ifile.flush() # free memory from base64 data b64_data = None gc.collect() # convert file protocol = "" try: with NTFile(mode="wb", suffix = ".stdout", dir="/tmp") as stdout: sh.ebook_convert(ifile.name, ofilename, _out=stdout).wait() stdout.flush() protocol = open(stdout.name).read() stdout.close() except sh.ErrorReturnCode_1, e: raise UserWarning( "Conversion failed:\n" + e.message.encode("utf-8", errors='ignore') ) if output_format.upper() + " output written to" not in protocol: raise UserWarning("Conversion failed:\n" + protocol) # read the data from the converted file output_data = None with open(ofilename, "rb") as ofile: output_data = _wrap( b64encode(ofile.read()) ) gc.collect() # we have experienced unplesant memory spikes # remove temporary output file os.remove(ofilename) return ConversionResponse( format=output_format, b64_data=output_data, protocol=protocol )