included urllib2_file.py in the package to ease installation

This commit is contained in:
Felix Lohmeier 2017-03-14 22:04:06 +01:00
parent 6f8badae6a
commit bf91e918df
8 changed files with 394 additions and 11 deletions

View File

@ -5,8 +5,8 @@ OpenRefine Python Client Library
The OpenRefine Python Client Library provides an interface to
communicating with an `OpenRefine <http://openrefine.org/>`_ server.
If you are looking for a ready to use command line interface to OpenRefine then you might be interested in the docker variation of this library:
`felixlohmeier/openrefine-client <https://hub.docker.com/r/felixlohmeier/openrefine-client/>`_. You will find examples for batch processing (e.g. for usage in shell scripts) there.
If you are looking for a ready to use command line interface to OpenRefine for batch processing then you might be interested in the following bash shell script:
`felixlohmeier/openrefine-batch <https://github.com/felixlohmeier/openrefine-batch>`_
If you are familiar with python and want to go into more depth, then read on!
@ -71,12 +71,6 @@ Installation
(Someone with more familiarity with python's byzantine collection of installation
frameworks is very welcome to improve/"best practice" all this.)
#. Install dependencies, which currently is ``urllib2_file``:
``sudo pip install -r requirements.txt``
(If you don't have ``pip`` visit `pip-installer.org <http://www.pip-installer.org/en/latest/installing.html#install-or-upgrade-pip>`_)
#. Ensure you have a Refine server running somewhere and, if necessary, set
the environment vars as above.

View File

@ -26,7 +26,7 @@ import re
import StringIO
import time
import urllib
import urllib2_file
from google.urllib2_file import urllib2_file
import urllib2
import urlparse

View File

@ -0,0 +1,50 @@
Copyright (C) 2004,2005,2006,2008,2009,2010 Fabien SEISEN
PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------
1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.
2. Subject to the terms and conditions of this License Agreement, PSF
hereby grants Licensee a nonexclusive, royalty-free, world-wide
license to reproduce, analyze, test, perform and/or display publicly,
prepare derivative works, distribute, and otherwise use Python
alone or in any derivative version, provided, however, that PSF's
License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
Reserved" are retained in Python alone or in any derivative version
prepared by Licensee.
3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.
4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.
5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.
7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.
8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.

View File

@ -0,0 +1,32 @@
Python urllib2_file.py enable you to upload files using HTTP multipart/form-data
Install:
python setup.py test
python setup.py build
python setup.py install
Example:
import urllib2_files
import urllib2
data = { 'foo': 'bar',
'form_name': open("/lib/libc.so.1") }
(send something like: 'Content-Disposition: form-data; name="form_name"; filename="form_name";' )
Or if you want to specify a different filename:
data = {'foo': 'bar',
'form_name': {'fd': open('/lib/libresolv.so.2',
'filename': 'libresolv.so'} }
(send something like: 'Content-Disposition: form-data; name="form_name"; filename="libresolv.so";' )
u = urllib2.urlopen('http://site.com/path/upload.php', data)
Tested with:
python 2.3
python 2.4
python 2.5

View File

View File

@ -0,0 +1,308 @@
#!/usr/bin/env python
# Copyright (C) 2004,2005,2006,2008,2009,2010 Fabien SEISEN
#
# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
# --------------------------------------------
#
# 1. This LICENSE AGREEMENT is between the Python Software Foundation
# ("PSF"), and the Individual or Organization ("Licensee") accessing and
# otherwise using this software ("Python") in source or binary form and
# its associated documentation.
#
# 2. Subject to the terms and conditions of this License Agreement, PSF
# hereby grants Licensee a nonexclusive, royalty-free, world-wide
# license to reproduce, analyze, test, perform and/or display publicly,
# prepare derivative works, distribute, and otherwise use Python
# alone or in any derivative version, provided, however, that PSF's
# License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
# 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
# Reserved" are retained in Python alone or in any derivative version
# prepared by Licensee.
#
# 3. In the event Licensee prepares a derivative work that is based on
# or incorporates Python or any part thereof, and wants to make
# the derivative work available to others as provided herein, then
# Licensee hereby agrees to include in any such work a brief summary of
# the changes made to Python.
#
# 4. PSF is making Python available to Licensee on an "AS IS"
# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
# INFRINGE ANY THIRD PARTY RIGHTS.
#
# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
#
# 6. This License Agreement will automatically terminate upon a material
# breach of its terms and conditions.
#
# 7. Nothing in this License Agreement shall be deemed to create any
# relationship of agency, partnership, or joint venture between PSF and
# Licensee. This License Agreement does not grant permission to use PSF
# trademarks or trade name in a trademark sense to endorse or promote
# products or services of Licensee, or any third party.
#
# 8. By copying, installing or otherwise using Python, Licensee
# agrees to be bound by the terms and conditions of this License
# Agreement.
#
"""
extend urllib2 to enable uploading files using multipart/form-data
I was looking for something to make me able to upload files to my photo web site (http://gallery.menalto.com/).
Inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
Example:
import urllib2_file
import urllib2
data = { 'foo': 'bar',
'form_name': open("/lib/libc.so.1")
}
(send something like: 'Content-Disposition: form-data; name="form_name"; filename="form_name";' )
Or if you want to specify a different filename:
data = { 'foo': 'bar',
'form_name': { 'fd': open('/lib/libresolv.so.2',
'filename': 'libresolv.so'}
}
(send something like: 'Content-Disposition: form-data; name="form_name"; filename="libresolv.so";' )
u = urllib2.urlopen('http://site.com/path/upload.php', data)
THANKS to:
- bug fix: kosh @T aesaeion.com
- HTTPS support : Ryan Grow <ryangrow @T yahoo.com>
- upload is now done with chunks (Adam Ambrose)
- UTF-8 filenames are now allowed (Eli Golovinsky)
- File object is no more mandatory, Object only needs to have seek() read() attributes (Eli Golovinsky)
- StringIO workaround (Laurent Coustet), does not work with cStringIO
Also modified by Adam Ambrose (aambrose @T pacbell.net) to write data in
chunks (hardcoded to CHUNK_SIZE for now), so the entire contents of the file
don't need to be kept in memory.
"""
__author__ = 'Fabien SEISEN'
__license__ = 'Python Software Foundation License version 2'
__url__ = 'http://fabien.seisen.org/python/'
import httplib
import mimetools
import mimetypes
import os
import os.path
import socket
import stat
import sys
import urllib
import urllib2
CHUNK_SIZE = 65536
def get_content_type(filename):
return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
# if sock is None, return the estimate size
def send_data(v_vars, v_files, boundary, sock=None):
"""Parse v_vars, v_files and create a buffer with HTTP multipart/form-data
if sock is set, send data to it
v_vars = {"key": "value"}
v_files = {"filename" : open("path/to/file"}
"""
buffer_len = 0
for (k, v) in v_vars:
buffer=''
buffer += '--%s\r\n' % boundary
buffer += 'Content-Disposition: form-data; name="%s"\r\n' % k
buffer += '\r\n'
buffer += v + '\r\n'
if sock:
sock.send(buffer)
buffer_len += len(buffer)
for (k, v) in v_files:
name = k
filename = k
if isinstance(v, dict):
if v.has_key('fd'):
fd = v['fd']
else:
raise TypeError("if value is dict, it must have keys 'fd' and 'filename'")
if v.has_key('filename'):
filename = v['filename']
else:
raise TypeError("if value is dict, it must have keys 'fd' and 'filename'")
else:
fd = v
if not hasattr(fd, 'seek'):
raise TypeError("file descriptor MUST have seek attribute")
if not hasattr(fd, 'read'):
raise TypeError("file descriptor MUST have read attribute")
fd.seek(0)
if hasattr(fd, 'fileno'):
# a File
file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
else:
# Final resort, read the entire message, and figure out the size
file_size = 0
while True:
chunk = fd.read(CHUNK_SIZE)
if chunk:
# It's not necessarily going to be CHUNK_SIZE large, since
# the last chunk is very likely < CHUNK_SIZE
file_size += len(chunk)
else:
break
fd.seek(0)
if isinstance(filename, unicode):
filename = filename.encode('UTF-8')
buffer = ''
buffer += '--%s\r\n' % boundary
buffer += 'Content-Disposition: form-data; name="%s"; filename="%s";\r\n' \
% (name, filename)
buffer += 'Content-Type: %s\r\n' % get_content_type(filename)
buffer += 'Content-Length: %s\r\n' % file_size
buffer += '\r\n'
buffer_len += len(buffer)
if sock:
sock.send(buffer)
if hasattr(fd, 'seek'):
fd.seek(0)
# read file only of sock is defined
if sock:
while True:
chunk = fd.read(CHUNK_SIZE)
if not chunk:
break
if sock:
sock.send(chunk)
buffer_len += file_size
buffer = '\r\n'
buffer += '--%s--\r\n' % boundary
buffer += '\r\n'
if sock:
sock.send(buffer)
buffer_len += len(buffer)
return buffer_len
# mainly a copy of HTTPHandler from urllib2
class newHTTPHandler(urllib2.BaseHandler):
def http_open(self, req):
return self.do_open(httplib.HTTP, req)
def do_open(self, http_class, req):
data = req.get_data()
v_files = []
v_vars = []
# mapping object (dict)
if req.has_data() and type(data) != str:
if hasattr(data, 'items'):
data = data.items()
else:
try:
if len(data) and not isinstance(data[0], tuple):
raise TypeError
except TypeError:
ty, va, tb = sys.exc_info()
raise TypeError, "not a valid non-string sequence or mapping object", tb
for (k, v) in data:
# if fd is provided with a filename
if isinstance(v, dict):
if not v.has_key('fd'):
raise TypeError("if value is dict, it must have keys 'fd' and 'filename")
if not v.has_key('filename'):
raise TypeError("if value is dict, it must have keys 'fd' and 'filename")
v_files.append( (k, v) )
elif hasattr(v, 'read'):
v_files.append( (k, v) )
else:
v_vars.append( (k, v) )
# no file ? convert to string
if len(v_vars) > 0 and len(v_files) == 0:
data = urllib.urlencode(v_vars)
v_files = []
v_vars = []
host = req.get_host()
if not host:
raise urllib2.URLError('no host given')
h = http_class(host) # will parse host:port
if req.has_data():
h.putrequest(req.get_method(), req.get_selector())
if not 'Content-type' in req.headers:
if len(v_files) > 0:
boundary = mimetools.choose_boundary()
l = send_data(v_vars, v_files, boundary)
h.putheader('Content-Type',
'multipart/form-data; boundary=%s' % boundary)
h.putheader('Content-length', str(l))
else:
h.putheader('Content-type',
'application/x-www-form-urlencoded')
if not 'Content-length' in req.headers:
h.putheader('Content-length', '%d' % len(data))
else:
h.putrequest(req.get_method(), req.get_selector())
scheme, sel = urllib.splittype(req.get_selector())
sel_host, sel_path = urllib.splithost(sel)
h.putheader('Host', sel_host or host)
for name, value in self.parent.addheaders:
name = name.capitalize()
if name not in req.headers:
h.putheader(name, value)
for k, v in req.headers.items():
h.putheader(k, v)
# httplib will attempt to connect() here. be prepared
# to convert a socket error to a URLError.
try:
h.endheaders()
except socket.error, err:
raise urllib2.URLError(err)
if req.has_data():
if len(v_files) > 0:
l = send_data(v_vars, v_files, boundary, h)
elif len(v_vars) > 0:
# if data is passed as dict ...
data = urllib.urlencode(v_vars)
h.send(data)
else:
# "normal" urllib2.urlopen()
h.send(data)
code, msg, hdrs = h.getreply()
fp = h.getfile()
if code == 200:
resp = urllib.addinfourl(fp, hdrs, req.get_full_url())
resp.code = code
resp.msg = msg
return resp
else:
return self.parent.error('http', req, fp, code, msg, hdrs)
urllib2._old_HTTPHandler = urllib2.HTTPHandler
urllib2.HTTPHandler = newHTTPHandler
class newHTTPSHandler(newHTTPHandler):
def https_open(self, req):
return self.do_open(httplib.HTTPS, req)
urllib2.HTTPSHandler = newHTTPSHandler

View File

@ -26,7 +26,7 @@ import time
from google.refine import refine
import urllib2_file
from google.urllib2_file import urllib2_file
import urllib2
import urlparse

View File

@ -1 +0,0 @@
urllib2_file>=0.2.1