org.apache.nutch.protocol.http.api.HttpException Java Examples
The following examples show how to use
org.apache.nutch.protocol.http.api.HttpException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HttpResponse.java From anthelion with Apache License 2.0 | 6 votes |
private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { readLine(in, line, false); int codeStart = line.indexOf(" "); int codeEnd = line.indexOf(" ", codeStart+1); // handle lines with no plaintext result code, ie: // "HTTP/1.1 200" vs "HTTP/1.1 200 OK" if (codeEnd == -1) codeEnd= line.length(); int code; try { code= Integer.parseInt(line.substring(codeStart+1, codeEnd)); } catch (NumberFormatException e) { throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e); } return code; }
Example #2
Source File: HttpResponse.java From anthelion with Apache License 2.0 | 6 votes |
private void processHeaderLine(StringBuffer line) throws IOException, HttpException { int colonIndex = line.indexOf(":"); // key is up to colon if (colonIndex == -1) { int i; for (i= 0; i < line.length(); i++) if (!Character.isWhitespace(line.charAt(i))) break; if (i == line.length()) return; throw new HttpException("No colon in header:" + line); } String key = line.substring(0, colonIndex); int valueStart = colonIndex+1; // skip whitespace while (valueStart < line.length()) { int c = line.charAt(valueStart); if (c != ' ' && c != '\t') break; valueStart++; } String value = line.substring(valueStart); headers.set(key, value); }
Example #3
Source File: HttpResponse.java From nutch-selenium with Apache License 2.0 | 6 votes |
private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { readLine(in, line, false); int codeStart = line.indexOf(" "); int codeEnd = line.indexOf(" ", codeStart + 1); // handle lines with no plaintext result code, ie: // "HTTP/1.1 200" vs "HTTP/1.1 200 OK" if (codeEnd == -1) codeEnd = line.length(); int code; try { code = Integer.parseInt(line.substring(codeStart + 1, codeEnd)); } catch (NumberFormatException e) { throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e); } return code; }
Example #4
Source File: HttpResponse.java From nutch-selenium with Apache License 2.0 | 6 votes |
private void processHeaderLine(StringBuffer line) throws IOException, HttpException { int colonIndex = line.indexOf(":"); // key is up to colon if (colonIndex == -1) { int i; for (i = 0; i < line.length(); i++) if (!Character.isWhitespace(line.charAt(i))) break; if (i == line.length()) return; throw new HttpException("No colon in header:" + line); } String key = line.substring(0, colonIndex); int valueStart = colonIndex + 1; // skip whitespace while (valueStart < line.length()) { int c = line.charAt(valueStart); if (c != ' ' && c != '\t') break; valueStart++; } String value = line.substring(valueStart); headers.set(key, value); }
Example #5
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { readLine(in, line, false); int codeStart = line.indexOf(" "); int codeEnd = line.indexOf(" ", codeStart+1); // handle lines with no plaintext result code, ie: // "HTTP/1.1 200" vs "HTTP/1.1 200 OK" if (codeEnd == -1) codeEnd= line.length(); int code; try { code= Integer.parseInt(line.substring(codeStart+1, codeEnd)); } catch (NumberFormatException e) { throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e); } return code; }
Example #6
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
private void processHeaderLine(StringBuffer line) throws IOException, HttpException { int colonIndex = line.indexOf(":"); // key is up to colon if (colonIndex == -1) { int i; for (i= 0; i < line.length(); i++) if (!Character.isWhitespace(line.charAt(i))) break; if (i == line.length()) return; throw new HttpException("No colon in header:" + line); } String key = line.substring(0, colonIndex); int valueStart = colonIndex+1; // skip whitespace while (valueStart < line.length()) { int c = line.charAt(valueStart); if (c != ' ' && c != '\t') break; valueStart++; } String value = line.substring(valueStart); headers.set(key, value); }
Example #7
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
private void readPlainContent(InputStream in) throws HttpException, IOException { int contentLength = Integer.MAX_VALUE; // get content length String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { contentLengthString = contentLengthString.trim(); try { if (!contentLengthString.isEmpty()) contentLength = Integer.parseInt(contentLengthString); } catch (NumberFormatException e) { throw new HttpException("bad content length: " + contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit download size contentLength = http.getMaxContent(); ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); byte[] bytes = new byte[Http.BUFFER_SIZE]; int length = 0; // read content for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in.read(bytes)) { out.write(bytes, 0, i); length += i; } content = out.toByteArray(); }
Example #8
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
private void processHeaderLine(StringBuffer line) throws IOException, HttpException { int colonIndex = line.indexOf(":"); // key is up to colon if (colonIndex == -1) { int i; for (i = 0; i < line.length(); i++) if (!Character.isWhitespace(line.charAt(i))) break; if (i == line.length()) return; throw new HttpException("No colon in header:" + line); } String key = line.substring(0, colonIndex); int valueStart = colonIndex + 1; // skip whitespace while (valueStart < line.length()) { int c = line.charAt(valueStart); if (c != ' ' && c != '\t') break; valueStart++; } String value = line.substring(valueStart); headers.set(key, value); }
Example #9
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 6 votes |
private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { readLine(in, line, false); int codeStart = line.indexOf(" "); int codeEnd = line.indexOf(" ", codeStart + 1); // handle lines with no plaintext result code, ie: // "HTTP/1.1 200" vs "HTTP/1.1 200 OK" if (codeEnd == -1) codeEnd = line.length(); int code; try { code = Integer.parseInt(line.substring(codeStart + 1, codeEnd)); } catch (NumberFormatException e) { throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e); } return code; }
Example #10
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos; if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1) || ((pos = line.indexOf("<html")) != -1)) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { //TODO: (CM) We don't know the header names here //since we're just handling them generically. It would //be nice to provide some sort of mapping function here //for the returned header names to the standard metadata //names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.warn("Error: ", e); } return; } processHeaderLine(line); } }
Example #11
Source File: HttpResponse.java From anthelion with Apache License 2.0 | 5 votes |
private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos; if ( ((pos= line.indexOf("<!DOCTYPE")) != -1) || ((pos= line.indexOf("<HTML")) != -1) || ((pos= line.indexOf("<html")) != -1) ) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { //TODO: (CM) We don't know the header names here //since we're just handling them generically. It would //be nice to provide some sort of mapping function here //for the returned header names to the standard metadata //names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.warn("Error: ", e); } return; } processHeaderLine(line); } }
Example #12
Source File: HttpResponse.java From nutch-selenium with Apache License 2.0 | 5 votes |
private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos; if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1) || ((pos = line.indexOf("<html")) != -1)) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { //TODO: (CM) We don't know the header names here //since we're just handling them generically. It would //be nice to provide some sort of mapping function here //for the returned header names to the standard metadata //names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.warn("Error: ", e); } return; } processHeaderLine(line); } }
Example #13
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
private void readPlainContent(InputStream in) throws HttpException, IOException { int contentLength = Integer.MAX_VALUE; // get content length String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { contentLengthString = contentLengthString.trim(); try { if (!contentLengthString.isEmpty()) contentLength = Integer.parseInt(contentLengthString); } catch (NumberFormatException e) { throw new HttpException("bad content length: "+contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit download size contentLength = http.getMaxContent(); ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); byte[] bytes = new byte[Http.BUFFER_SIZE]; int length = 0; // read content for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in.read(bytes)) { out.write(bytes, 0, i); length += i; } content = out.toByteArray(); }
Example #14
Source File: HttpResponse.java From anthelion with Apache License 2.0 | 5 votes |
private void readPlainContent(InputStream in) throws HttpException, IOException { int contentLength = Integer.MAX_VALUE; // get content length String contentLengthString = headers.get(Response.CONTENT_LENGTH); if (contentLengthString != null) { contentLengthString = contentLengthString.trim(); try { if (!contentLengthString.isEmpty()) contentLength = Integer.parseInt(contentLengthString); } catch (NumberFormatException e) { throw new HttpException("bad content length: "+contentLengthString); } } if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit download size contentLength = http.getMaxContent(); ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); byte[] bytes = new byte[Http.BUFFER_SIZE]; int length = 0; // read content for (int i = in.read(bytes); i != -1 && length + i <= contentLength; i = in.read(bytes)) { out.write(bytes, 0, i); length += i; } content = out.toByteArray(); }
Example #15
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException { while (readLine(in, line, true) != 0) { // handle HTTP responses with missing blank line after headers int pos; if ( ((pos= line.indexOf("<!DOCTYPE")) != -1) || ((pos= line.indexOf("<HTML")) != -1) || ((pos= line.indexOf("<html")) != -1) ) { in.unread(line.substring(pos).getBytes("UTF-8")); line.setLength(pos); try { //TODO: (CM) We don't know the header names here //since we're just handling them generically. It would //be nice to provide some sort of mapping function here //for the returned header names to the standard metadata //names in the ParseData class processHeaderLine(line); } catch (Exception e) { // fixme: Http.LOG.warn("Error: ", e); } return; } processHeaderLine(line); } }
Example #16
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
/** * * @param in * @param line * @throws HttpException * @throws IOException */ @SuppressWarnings("unused") private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks= false; int contentBytesRead= 0; byte[] bytes = new byte[Http.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); while (!doneChunks) { if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("Http: starting chunk"); } readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); } int pos= line.indexOf(";"); if (pos < 0) { chunkLenStr= line.toString(); } else { chunkLenStr= line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); } } chunkLenStr= chunkLenStr.trim(); int chunkLen; try { chunkLen= Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e){ throw new HttpException("bad chunk length: "+line.toString()); } if (chunkLen == 0) { doneChunks= true; break; } if ( (contentBytesRead + chunkLen) > http.getMaxContent() ) chunkLen= http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead= 0; while (chunkBytesRead < chunkLen) { int toRead= (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE; int len= in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, len)); } out.write(bytes, 0, len); chunkBytesRead+= len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line); }
Example #17
Source File: HttpResponse.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
/** * * @param in * @param line * @throws HttpException * @throws IOException */ @SuppressWarnings("unused") private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks = false; int contentBytesRead = 0; byte[] bytes = new byte[Http.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); while (!doneChunks) { if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("Http: starting chunk"); } readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); } int pos = line.indexOf(";"); if (pos < 0) { chunkLenStr = line.toString(); } else { chunkLenStr = line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); } } chunkLenStr = chunkLenStr.trim(); int chunkLen; try { chunkLen = Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e) { throw new HttpException("bad chunk length: " + line.toString()); } if (chunkLen == 0) { doneChunks = true; break; } if ((contentBytesRead + chunkLen) > http.getMaxContent()) chunkLen = http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead = 0; while (chunkBytesRead < chunkLen) { int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE; int len = in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, len)); } out.write(bytes, 0, len); chunkBytesRead += len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line); }
Example #18
Source File: HttpResponse.java From anthelion with Apache License 2.0 | 4 votes |
private void readChunkedContent(PushbackInputStream in, StringBuffer line) throws HttpException, IOException { boolean doneChunks= false; int contentBytesRead= 0; byte[] bytes = new byte[Http.BUFFER_SIZE]; ByteArrayOutputStream out = new ByteArrayOutputStream(Http.BUFFER_SIZE); while (!doneChunks) { if (Http.LOG.isTraceEnabled()) { Http.LOG.trace("Http: starting chunk"); } readLine(in, line, false); String chunkLenStr; // if (LOG.isTraceEnabled()) { LOG.trace("chunk-header: '" + line + "'"); } int pos= line.indexOf(";"); if (pos < 0) { chunkLenStr= line.toString(); } else { chunkLenStr= line.substring(0, pos); // if (LOG.isTraceEnabled()) { LOG.trace("got chunk-ext: " + line.substring(pos+1)); } } chunkLenStr= chunkLenStr.trim(); int chunkLen; try { chunkLen= Integer.parseInt(chunkLenStr, 16); } catch (NumberFormatException e){ throw new HttpException("bad chunk length: "+line.toString()); } if (chunkLen == 0) { doneChunks= true; break; } if ( (contentBytesRead + chunkLen) > http.getMaxContent() ) chunkLen= http.getMaxContent() - contentBytesRead; // read one chunk int chunkBytesRead= 0; while (chunkBytesRead < chunkLen) { int toRead= (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ? (chunkLen - chunkBytesRead) : Http.BUFFER_SIZE; int len= in.read(bytes, 0, toRead); if (len == -1) throw new HttpException("chunk eof after " + contentBytesRead + " bytes in successful chunks" + " and " + chunkBytesRead + " in current chunk"); // DANGER!!! Will printed GZIPed stuff right to your // terminal! // if (LOG.isTraceEnabled()) { LOG.trace("read: " + new String(bytes, 0, len)); } out.write(bytes, 0, len); chunkBytesRead+= len; } readLine(in, line, false); } if (!doneChunks) { if (contentBytesRead != http.getMaxContent()) throw new HttpException("chunk eof: !doneChunk && didn't max out"); return; } content = out.toByteArray(); parseHeaders(in, line); }