diff --git a/README.rst b/README.rst
index fb0d36f..bc17770 100644
--- a/README.rst
+++ b/README.rst
@@ -5,8 +5,8 @@ OpenRefine Python Client Library
The OpenRefine Python Client Library provides an interface to
communicating with an `OpenRefine `_ server.
-If you are looking for a ready to use command line interface to OpenRefine for batch processing then you might be interested in the following bash shell script:
-`felixlohmeier/openrefine-batch `_
+If you are looking for a ready to use command line interface to OpenRefine then you might be interested in the docker variation of this library:
+`felixlohmeier/openrefine-client `_. You will find examples for batch processing (e.g. for usage in shell scripts) there.
If you are familiar with python and want to go into more depth, then read on!
@@ -71,6 +71,12 @@ Installation
(Someone with more familiarity with python's byzantine collection of installation
frameworks is very welcome to improve/"best practice" all this.)
+#. Install dependencies, which currently is ``urllib2_file``:
+
+ ``sudo pip install -r requirements.txt``
+
+ (If you don't have ``pip`` visit `pip-installer.org `_)
+
#. Ensure you have a Refine server running somewhere and, if necessary, set
the environment vars as above.
diff --git a/google/refine/refine.py b/google/refine/refine.py
index 0d19160..c7c9b91 100644
--- a/google/refine/refine.py
+++ b/google/refine/refine.py
@@ -26,7 +26,7 @@ import re
import StringIO
import time
import urllib
-from google.urllib2_file import urllib2_file
+import urllib2_file
import urllib2
import urlparse
diff --git a/google/urllib2_file/LICENSE b/google/urllib2_file/LICENSE
deleted file mode 100644
index 797327d..0000000
--- a/google/urllib2_file/LICENSE
+++ /dev/null
@@ -1,50 +0,0 @@
-Copyright (C) 2004,2005,2006,2008,2009,2010 Fabien SEISEN
-
-PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
---------------------------------------------
-
-1. This LICENSE AGREEMENT is between the Python Software Foundation
-("PSF"), and the Individual or Organization ("Licensee") accessing and
-otherwise using this software ("Python") in source or binary form and
-its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, PSF
-hereby grants Licensee a nonexclusive, royalty-free, world-wide
-license to reproduce, analyze, test, perform and/or display publicly,
-prepare derivative works, distribute, and otherwise use Python
-alone or in any derivative version, provided, however, that PSF's
-License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
-2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
-Reserved" are retained in Python alone or in any derivative version
-prepared by Licensee.
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python.
-
-4. PSF is making Python available to Licensee on an "AS IS"
-basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. Nothing in this License Agreement shall be deemed to create any
-relationship of agency, partnership, or joint venture between PSF and
-Licensee. This License Agreement does not grant permission to use PSF
-trademarks or trade name in a trademark sense to endorse or promote
-products or services of Licensee, or any third party.
-
-8. By copying, installing or otherwise using Python, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
diff --git a/google/urllib2_file/README b/google/urllib2_file/README
deleted file mode 100644
index 5d1b61d..0000000
--- a/google/urllib2_file/README
+++ /dev/null
@@ -1,32 +0,0 @@
-Python urllib2_file.py enable you to upload files using HTTP multipart/form-data
-
-Install:
-
-python setup.py test
-python setup.py build
-python setup.py install
-
-Example:
-
-import urllib2_files
-import urllib2
-
-data = { 'foo': 'bar',
- 'form_name': open("/lib/libc.so.1") }
-
-(send something like: 'Content-Disposition: form-data; name="form_name"; filename="form_name";' )
-
-Or if you want to specify a different filename:
-
-data = {'foo': 'bar',
- 'form_name': {'fd': open('/lib/libresolv.so.2',
- 'filename': 'libresolv.so'} }
-
-(send something like: 'Content-Disposition: form-data; name="form_name"; filename="libresolv.so";' )
-
-u = urllib2.urlopen('http://site.com/path/upload.php', data)
-
-Tested with:
- python 2.3
- python 2.4
- python 2.5
diff --git a/google/urllib2_file/__init__.py b/google/urllib2_file/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/google/urllib2_file/urllib2_file.py b/google/urllib2_file/urllib2_file.py
deleted file mode 100644
index 6af9f98..0000000
--- a/google/urllib2_file/urllib2_file.py
+++ /dev/null
@@ -1,308 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) 2004,2005,2006,2008,2009,2010 Fabien SEISEN
-#
-# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
-# --------------------------------------------
-#
-# 1. This LICENSE AGREEMENT is between the Python Software Foundation
-# ("PSF"), and the Individual or Organization ("Licensee") accessing and
-# otherwise using this software ("Python") in source or binary form and
-# its associated documentation.
-#
-# 2. Subject to the terms and conditions of this License Agreement, PSF
-# hereby grants Licensee a nonexclusive, royalty-free, world-wide
-# license to reproduce, analyze, test, perform and/or display publicly,
-# prepare derivative works, distribute, and otherwise use Python
-# alone or in any derivative version, provided, however, that PSF's
-# License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
-# 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights
-# Reserved" are retained in Python alone or in any derivative version
-# prepared by Licensee.
-#
-# 3. In the event Licensee prepares a derivative work that is based on
-# or incorporates Python or any part thereof, and wants to make
-# the derivative work available to others as provided herein, then
-# Licensee hereby agrees to include in any such work a brief summary of
-# the changes made to Python.
-#
-# 4. PSF is making Python available to Licensee on an "AS IS"
-# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-# INFRINGE ANY THIRD PARTY RIGHTS.
-#
-# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-#
-# 6. This License Agreement will automatically terminate upon a material
-# breach of its terms and conditions.
-#
-# 7. Nothing in this License Agreement shall be deemed to create any
-# relationship of agency, partnership, or joint venture between PSF and
-# Licensee. This License Agreement does not grant permission to use PSF
-# trademarks or trade name in a trademark sense to endorse or promote
-# products or services of Licensee, or any third party.
-#
-# 8. By copying, installing or otherwise using Python, Licensee
-# agrees to be bound by the terms and conditions of this License
-# Agreement.
-#
-"""
-extend urllib2 to enable uploading files using multipart/form-data
-
-I was looking for something to make me able to upload files to my photo web site (http://gallery.menalto.com/).
-Inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
-
-Example:
-
-import urllib2_file
-import urllib2
-
-data = { 'foo': 'bar',
- 'form_name': open("/lib/libc.so.1")
- }
-(send something like: 'Content-Disposition: form-data; name="form_name"; filename="form_name";' )
-
-Or if you want to specify a different filename:
-data = { 'foo': 'bar',
- 'form_name': { 'fd': open('/lib/libresolv.so.2',
- 'filename': 'libresolv.so'}
- }
-(send something like: 'Content-Disposition: form-data; name="form_name"; filename="libresolv.so";' )
-
-u = urllib2.urlopen('http://site.com/path/upload.php', data)
-
-
-THANKS to:
-- bug fix: kosh @T aesaeion.com
-- HTTPS support : Ryan Grow
- - upload is now done with chunks (Adam Ambrose)
- - UTF-8 filenames are now allowed (Eli Golovinsky)
- - File object is no more mandatory, Object only needs to have seek() read() attributes (Eli Golovinsky)
- - StringIO workaround (Laurent Coustet), does not work with cStringIO
-
- Also modified by Adam Ambrose (aambrose @T pacbell.net) to write data in
-chunks (hardcoded to CHUNK_SIZE for now), so the entire contents of the file
-don't need to be kept in memory.
-
-"""
-
-__author__ = 'Fabien SEISEN'
-__license__ = 'Python Software Foundation License version 2'
-__url__ = 'http://fabien.seisen.org/python/'
-
-import httplib
-import mimetools
-import mimetypes
-import os
-import os.path
-import socket
-import stat
-import sys
-import urllib
-import urllib2
-
-CHUNK_SIZE = 65536
-
-def get_content_type(filename):
- return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
-
-# if sock is None, return the estimate size
-
-def send_data(v_vars, v_files, boundary, sock=None):
- """Parse v_vars, v_files and create a buffer with HTTP multipart/form-data
- if sock is set, send data to it
- v_vars = {"key": "value"}
- v_files = {"filename" : open("path/to/file"}
- """
-
- buffer_len = 0
- for (k, v) in v_vars:
- buffer=''
- buffer += '--%s\r\n' % boundary
- buffer += 'Content-Disposition: form-data; name="%s"\r\n' % k
- buffer += '\r\n'
- buffer += v + '\r\n'
- if sock:
- sock.send(buffer)
- buffer_len += len(buffer)
-
- for (k, v) in v_files:
- name = k
- filename = k
- if isinstance(v, dict):
- if v.has_key('fd'):
- fd = v['fd']
- else:
- raise TypeError("if value is dict, it must have keys 'fd' and 'filename'")
-
- if v.has_key('filename'):
- filename = v['filename']
- else:
- raise TypeError("if value is dict, it must have keys 'fd' and 'filename'")
- else:
- fd = v
-
- if not hasattr(fd, 'seek'):
- raise TypeError("file descriptor MUST have seek attribute")
-
- if not hasattr(fd, 'read'):
- raise TypeError("file descriptor MUST have read attribute")
-
- fd.seek(0)
- if hasattr(fd, 'fileno'):
- # a File
- file_size = os.fstat(fd.fileno())[stat.ST_SIZE]
- else:
- # Final resort, read the entire message, and figure out the size
- file_size = 0
- while True:
- chunk = fd.read(CHUNK_SIZE)
- if chunk:
- # It's not necessarily going to be CHUNK_SIZE large, since
- # the last chunk is very likely < CHUNK_SIZE
- file_size += len(chunk)
- else:
- break
- fd.seek(0)
-
- if isinstance(filename, unicode):
- filename = filename.encode('UTF-8')
- buffer = ''
- buffer += '--%s\r\n' % boundary
- buffer += 'Content-Disposition: form-data; name="%s"; filename="%s";\r\n' \
- % (name, filename)
- buffer += 'Content-Type: %s\r\n' % get_content_type(filename)
- buffer += 'Content-Length: %s\r\n' % file_size
- buffer += '\r\n'
-
- buffer_len += len(buffer)
- if sock:
- sock.send(buffer)
- if hasattr(fd, 'seek'):
- fd.seek(0)
- # read file only of sock is defined
- if sock:
- while True:
- chunk = fd.read(CHUNK_SIZE)
- if not chunk:
- break
- if sock:
- sock.send(chunk)
- buffer_len += file_size
- buffer = '\r\n'
- buffer += '--%s--\r\n' % boundary
- buffer += '\r\n'
- if sock:
- sock.send(buffer)
- buffer_len += len(buffer)
- return buffer_len
-
-# mainly a copy of HTTPHandler from urllib2
-class newHTTPHandler(urllib2.BaseHandler):
- def http_open(self, req):
- return self.do_open(httplib.HTTP, req)
-
- def do_open(self, http_class, req):
- data = req.get_data()
- v_files = []
- v_vars = []
- # mapping object (dict)
- if req.has_data() and type(data) != str:
- if hasattr(data, 'items'):
- data = data.items()
- else:
- try:
- if len(data) and not isinstance(data[0], tuple):
- raise TypeError
- except TypeError:
- ty, va, tb = sys.exc_info()
- raise TypeError, "not a valid non-string sequence or mapping object", tb
-
- for (k, v) in data:
- # if fd is provided with a filename
- if isinstance(v, dict):
- if not v.has_key('fd'):
- raise TypeError("if value is dict, it must have keys 'fd' and 'filename")
- if not v.has_key('filename'):
- raise TypeError("if value is dict, it must have keys 'fd' and 'filename")
- v_files.append( (k, v) )
- elif hasattr(v, 'read'):
- v_files.append( (k, v) )
- else:
- v_vars.append( (k, v) )
- # no file ? convert to string
- if len(v_vars) > 0 and len(v_files) == 0:
- data = urllib.urlencode(v_vars)
- v_files = []
- v_vars = []
- host = req.get_host()
- if not host:
- raise urllib2.URLError('no host given')
- h = http_class(host) # will parse host:port
- if req.has_data():
- h.putrequest(req.get_method(), req.get_selector())
- if not 'Content-type' in req.headers:
- if len(v_files) > 0:
- boundary = mimetools.choose_boundary()
- l = send_data(v_vars, v_files, boundary)
- h.putheader('Content-Type',
- 'multipart/form-data; boundary=%s' % boundary)
- h.putheader('Content-length', str(l))
- else:
- h.putheader('Content-type',
- 'application/x-www-form-urlencoded')
- if not 'Content-length' in req.headers:
- h.putheader('Content-length', '%d' % len(data))
- else:
- h.putrequest(req.get_method(), req.get_selector())
-
- scheme, sel = urllib.splittype(req.get_selector())
- sel_host, sel_path = urllib.splithost(sel)
- h.putheader('Host', sel_host or host)
- for name, value in self.parent.addheaders:
- name = name.capitalize()
- if name not in req.headers:
- h.putheader(name, value)
- for k, v in req.headers.items():
- h.putheader(k, v)
- # httplib will attempt to connect() here. be prepared
- # to convert a socket error to a URLError.
- try:
- h.endheaders()
- except socket.error, err:
- raise urllib2.URLError(err)
-
- if req.has_data():
- if len(v_files) > 0:
- l = send_data(v_vars, v_files, boundary, h)
- elif len(v_vars) > 0:
- # if data is passed as dict ...
- data = urllib.urlencode(v_vars)
- h.send(data)
- else:
- # "normal" urllib2.urlopen()
- h.send(data)
-
- code, msg, hdrs = h.getreply()
- fp = h.getfile()
- if code == 200:
- resp = urllib.addinfourl(fp, hdrs, req.get_full_url())
- resp.code = code
- resp.msg = msg
- return resp
- else:
- return self.parent.error('http', req, fp, code, msg, hdrs)
-
-urllib2._old_HTTPHandler = urllib2.HTTPHandler
-urllib2.HTTPHandler = newHTTPHandler
-
-class newHTTPSHandler(newHTTPHandler):
- def https_open(self, req):
- return self.do_open(httplib.HTTPS, req)
-
-urllib2.HTTPSHandler = newHTTPSHandler
-
diff --git a/refine.py b/refine.py
index ebb668f..d4a7f58 100755
--- a/refine.py
+++ b/refine.py
@@ -26,7 +26,7 @@ import time
from google.refine import refine
-from google.urllib2_file import urllib2_file
+import urllib2_file
import urllib2
import urlparse
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f02ab12
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+urllib2_file>=0.2.1
\ No newline at end of file