From f0643b46a0e47fe53e4b6c986fd781bd1eb795c1 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Fri, 17 Nov 2017 16:47:31 +0100 Subject: [PATCH] Revert "included urllib2_file.py in the package to ease installation" This reverts commit bf91e918df59c0ce4b96f102db2639f0d7ed521b. --- README.rst | 10 +- google/refine/refine.py | 2 +- google/urllib2_file/LICENSE | 50 ----- google/urllib2_file/README | 32 --- google/urllib2_file/__init__.py | 0 google/urllib2_file/urllib2_file.py | 308 ---------------------------- refine.py | 2 +- requirements.txt | 1 + 8 files changed, 11 insertions(+), 394 deletions(-) delete mode 100644 google/urllib2_file/LICENSE delete mode 100644 google/urllib2_file/README delete mode 100644 google/urllib2_file/__init__.py delete mode 100644 google/urllib2_file/urllib2_file.py create mode 100644 requirements.txt diff --git a/README.rst b/README.rst index fb0d36f..bc17770 100644 --- a/README.rst +++ b/README.rst @@ -5,8 +5,8 @@ OpenRefine Python Client Library The OpenRefine Python Client Library provides an interface to communicating with an `OpenRefine `_ server. -If you are looking for a ready to use command line interface to OpenRefine for batch processing then you might be interested in the following bash shell script: -`felixlohmeier/openrefine-batch `_ +If you are looking for a ready to use command line interface to OpenRefine then you might be interested in the docker variation of this library: +`felixlohmeier/openrefine-client `_. You will find examples for batch processing (e.g. for usage in shell scripts) there. If you are familiar with python and want to go into more depth, then read on! @@ -71,6 +71,12 @@ Installation (Someone with more familiarity with python's byzantine collection of installation frameworks is very welcome to improve/"best practice" all this.) +#. Install dependencies, which currently is ``urllib2_file``: + + ``sudo pip install -r requirements.txt`` + + (If you don't have ``pip`` visit `pip-installer.org `_) + #. Ensure you have a Refine server running somewhere and, if necessary, set the environment vars as above. diff --git a/google/refine/refine.py b/google/refine/refine.py index 0d19160..c7c9b91 100644 --- a/google/refine/refine.py +++ b/google/refine/refine.py @@ -26,7 +26,7 @@ import re import StringIO import time import urllib -from google.urllib2_file import urllib2_file +import urllib2_file import urllib2 import urlparse diff --git a/google/urllib2_file/LICENSE b/google/urllib2_file/LICENSE deleted file mode 100644 index 797327d..0000000 --- a/google/urllib2_file/LICENSE +++ /dev/null @@ -1,50 +0,0 @@ -Copyright (C) 2004,2005,2006,2008,2009,2010 Fabien SEISEN - -PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 --------------------------------------------- - -1. This LICENSE AGREEMENT is between the Python Software Foundation -("PSF"), and the Individual or Organization ("Licensee") accessing and -otherwise using this software ("Python") in source or binary form and -its associated documentation. - -2. Subject to the terms and conditions of this License Agreement, PSF -hereby grants Licensee a nonexclusive, royalty-free, world-wide -license to reproduce, analyze, test, perform and/or display publicly, -prepare derivative works, distribute, and otherwise use Python -alone or in any derivative version, provided, however, that PSF's -License Agreement and PSF's notice of copyright, i.e., "Copyright (c) -2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights -Reserved" are retained in Python alone or in any derivative version -prepared by Licensee. - -3. In the event Licensee prepares a derivative work that is based on -or incorporates Python or any part thereof, and wants to make -the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python. - -4. PSF is making Python available to Licensee on an "AS IS" -basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, -OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between PSF and -Licensee. This License Agreement does not grant permission to use PSF -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using Python, Licensee -agrees to be bound by the terms and conditions of this License -Agreement. diff --git a/google/urllib2_file/README b/google/urllib2_file/README deleted file mode 100644 index 5d1b61d..0000000 --- a/google/urllib2_file/README +++ /dev/null @@ -1,32 +0,0 @@ -Python urllib2_file.py enable you to upload files using HTTP multipart/form-data - -Install: - -python setup.py test -python setup.py build -python setup.py install - -Example: - -import urllib2_files -import urllib2 - -data = { 'foo': 'bar', - 'form_name': open("/lib/libc.so.1") } - -(send something like: 'Content-Disposition: form-data; name="form_name"; filename="form_name";' ) - -Or if you want to specify a different filename: - -data = {'foo': 'bar', - 'form_name': {'fd': open('/lib/libresolv.so.2', - 'filename': 'libresolv.so'} } - -(send something like: 'Content-Disposition: form-data; name="form_name"; filename="libresolv.so";' ) - -u = urllib2.urlopen('http://site.com/path/upload.php', data) - -Tested with: - python 2.3 - python 2.4 - python 2.5 diff --git a/google/urllib2_file/__init__.py b/google/urllib2_file/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/google/urllib2_file/urllib2_file.py b/google/urllib2_file/urllib2_file.py deleted file mode 100644 index 6af9f98..0000000 --- a/google/urllib2_file/urllib2_file.py +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) 2004,2005,2006,2008,2009,2010 Fabien SEISEN -# -# PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -# -------------------------------------------- -# -# 1. This LICENSE AGREEMENT is between the Python Software Foundation -# ("PSF"), and the Individual or Organization ("Licensee") accessing and -# otherwise using this software ("Python") in source or binary form and -# its associated documentation. -# -# 2. Subject to the terms and conditions of this License Agreement, PSF -# hereby grants Licensee a nonexclusive, royalty-free, world-wide -# license to reproduce, analyze, test, perform and/or display publicly, -# prepare derivative works, distribute, and otherwise use Python -# alone or in any derivative version, provided, however, that PSF's -# License Agreement and PSF's notice of copyright, i.e., "Copyright (c) -# 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation; All Rights -# Reserved" are retained in Python alone or in any derivative version -# prepared by Licensee. -# -# 3. In the event Licensee prepares a derivative work that is based on -# or incorporates Python or any part thereof, and wants to make -# the derivative work available to others as provided herein, then -# Licensee hereby agrees to include in any such work a brief summary of -# the changes made to Python. -# -# 4. PSF is making Python available to Licensee on an "AS IS" -# basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -# IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND -# DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -# FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT -# INFRINGE ANY THIRD PARTY RIGHTS. -# -# 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -# FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -# A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, -# OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. -# -# 6. This License Agreement will automatically terminate upon a material -# breach of its terms and conditions. -# -# 7. Nothing in this License Agreement shall be deemed to create any -# relationship of agency, partnership, or joint venture between PSF and -# Licensee. This License Agreement does not grant permission to use PSF -# trademarks or trade name in a trademark sense to endorse or promote -# products or services of Licensee, or any third party. -# -# 8. By copying, installing or otherwise using Python, Licensee -# agrees to be bound by the terms and conditions of this License -# Agreement. -# -""" -extend urllib2 to enable uploading files using multipart/form-data - -I was looking for something to make me able to upload files to my photo web site (http://gallery.menalto.com/). -Inspired by http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 - -Example: - -import urllib2_file -import urllib2 - -data = { 'foo': 'bar', - 'form_name': open("/lib/libc.so.1") - } -(send something like: 'Content-Disposition: form-data; name="form_name"; filename="form_name";' ) - -Or if you want to specify a different filename: -data = { 'foo': 'bar', - 'form_name': { 'fd': open('/lib/libresolv.so.2', - 'filename': 'libresolv.so'} - } -(send something like: 'Content-Disposition: form-data; name="form_name"; filename="libresolv.so";' ) - -u = urllib2.urlopen('http://site.com/path/upload.php', data) - - -THANKS to: -- bug fix: kosh @T aesaeion.com -- HTTPS support : Ryan Grow - - upload is now done with chunks (Adam Ambrose) - - UTF-8 filenames are now allowed (Eli Golovinsky) - - File object is no more mandatory, Object only needs to have seek() read() attributes (Eli Golovinsky) - - StringIO workaround (Laurent Coustet), does not work with cStringIO - - Also modified by Adam Ambrose (aambrose @T pacbell.net) to write data in -chunks (hardcoded to CHUNK_SIZE for now), so the entire contents of the file -don't need to be kept in memory. - -""" - -__author__ = 'Fabien SEISEN' -__license__ = 'Python Software Foundation License version 2' -__url__ = 'http://fabien.seisen.org/python/' - -import httplib -import mimetools -import mimetypes -import os -import os.path -import socket -import stat -import sys -import urllib -import urllib2 - -CHUNK_SIZE = 65536 - -def get_content_type(filename): - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' - -# if sock is None, return the estimate size - -def send_data(v_vars, v_files, boundary, sock=None): - """Parse v_vars, v_files and create a buffer with HTTP multipart/form-data - if sock is set, send data to it - v_vars = {"key": "value"} - v_files = {"filename" : open("path/to/file"} - """ - - buffer_len = 0 - for (k, v) in v_vars: - buffer='' - buffer += '--%s\r\n' % boundary - buffer += 'Content-Disposition: form-data; name="%s"\r\n' % k - buffer += '\r\n' - buffer += v + '\r\n' - if sock: - sock.send(buffer) - buffer_len += len(buffer) - - for (k, v) in v_files: - name = k - filename = k - if isinstance(v, dict): - if v.has_key('fd'): - fd = v['fd'] - else: - raise TypeError("if value is dict, it must have keys 'fd' and 'filename'") - - if v.has_key('filename'): - filename = v['filename'] - else: - raise TypeError("if value is dict, it must have keys 'fd' and 'filename'") - else: - fd = v - - if not hasattr(fd, 'seek'): - raise TypeError("file descriptor MUST have seek attribute") - - if not hasattr(fd, 'read'): - raise TypeError("file descriptor MUST have read attribute") - - fd.seek(0) - if hasattr(fd, 'fileno'): - # a File - file_size = os.fstat(fd.fileno())[stat.ST_SIZE] - else: - # Final resort, read the entire message, and figure out the size - file_size = 0 - while True: - chunk = fd.read(CHUNK_SIZE) - if chunk: - # It's not necessarily going to be CHUNK_SIZE large, since - # the last chunk is very likely < CHUNK_SIZE - file_size += len(chunk) - else: - break - fd.seek(0) - - if isinstance(filename, unicode): - filename = filename.encode('UTF-8') - buffer = '' - buffer += '--%s\r\n' % boundary - buffer += 'Content-Disposition: form-data; name="%s"; filename="%s";\r\n' \ - % (name, filename) - buffer += 'Content-Type: %s\r\n' % get_content_type(filename) - buffer += 'Content-Length: %s\r\n' % file_size - buffer += '\r\n' - - buffer_len += len(buffer) - if sock: - sock.send(buffer) - if hasattr(fd, 'seek'): - fd.seek(0) - # read file only of sock is defined - if sock: - while True: - chunk = fd.read(CHUNK_SIZE) - if not chunk: - break - if sock: - sock.send(chunk) - buffer_len += file_size - buffer = '\r\n' - buffer += '--%s--\r\n' % boundary - buffer += '\r\n' - if sock: - sock.send(buffer) - buffer_len += len(buffer) - return buffer_len - -# mainly a copy of HTTPHandler from urllib2 -class newHTTPHandler(urllib2.BaseHandler): - def http_open(self, req): - return self.do_open(httplib.HTTP, req) - - def do_open(self, http_class, req): - data = req.get_data() - v_files = [] - v_vars = [] - # mapping object (dict) - if req.has_data() and type(data) != str: - if hasattr(data, 'items'): - data = data.items() - else: - try: - if len(data) and not isinstance(data[0], tuple): - raise TypeError - except TypeError: - ty, va, tb = sys.exc_info() - raise TypeError, "not a valid non-string sequence or mapping object", tb - - for (k, v) in data: - # if fd is provided with a filename - if isinstance(v, dict): - if not v.has_key('fd'): - raise TypeError("if value is dict, it must have keys 'fd' and 'filename") - if not v.has_key('filename'): - raise TypeError("if value is dict, it must have keys 'fd' and 'filename") - v_files.append( (k, v) ) - elif hasattr(v, 'read'): - v_files.append( (k, v) ) - else: - v_vars.append( (k, v) ) - # no file ? convert to string - if len(v_vars) > 0 and len(v_files) == 0: - data = urllib.urlencode(v_vars) - v_files = [] - v_vars = [] - host = req.get_host() - if not host: - raise urllib2.URLError('no host given') - h = http_class(host) # will parse host:port - if req.has_data(): - h.putrequest(req.get_method(), req.get_selector()) - if not 'Content-type' in req.headers: - if len(v_files) > 0: - boundary = mimetools.choose_boundary() - l = send_data(v_vars, v_files, boundary) - h.putheader('Content-Type', - 'multipart/form-data; boundary=%s' % boundary) - h.putheader('Content-length', str(l)) - else: - h.putheader('Content-type', - 'application/x-www-form-urlencoded') - if not 'Content-length' in req.headers: - h.putheader('Content-length', '%d' % len(data)) - else: - h.putrequest(req.get_method(), req.get_selector()) - - scheme, sel = urllib.splittype(req.get_selector()) - sel_host, sel_path = urllib.splithost(sel) - h.putheader('Host', sel_host or host) - for name, value in self.parent.addheaders: - name = name.capitalize() - if name not in req.headers: - h.putheader(name, value) - for k, v in req.headers.items(): - h.putheader(k, v) - # httplib will attempt to connect() here. be prepared - # to convert a socket error to a URLError. - try: - h.endheaders() - except socket.error, err: - raise urllib2.URLError(err) - - if req.has_data(): - if len(v_files) > 0: - l = send_data(v_vars, v_files, boundary, h) - elif len(v_vars) > 0: - # if data is passed as dict ... - data = urllib.urlencode(v_vars) - h.send(data) - else: - # "normal" urllib2.urlopen() - h.send(data) - - code, msg, hdrs = h.getreply() - fp = h.getfile() - if code == 200: - resp = urllib.addinfourl(fp, hdrs, req.get_full_url()) - resp.code = code - resp.msg = msg - return resp - else: - return self.parent.error('http', req, fp, code, msg, hdrs) - -urllib2._old_HTTPHandler = urllib2.HTTPHandler -urllib2.HTTPHandler = newHTTPHandler - -class newHTTPSHandler(newHTTPHandler): - def https_open(self, req): - return self.do_open(httplib.HTTPS, req) - -urllib2.HTTPSHandler = newHTTPSHandler - diff --git a/refine.py b/refine.py index ebb668f..d4a7f58 100755 --- a/refine.py +++ b/refine.py @@ -26,7 +26,7 @@ import time from google.refine import refine -from google.urllib2_file import urllib2_file +import urllib2_file import urllib2 import urlparse diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f02ab12 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +urllib2_file>=0.2.1 \ No newline at end of file