|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.pandora.sense.BasicSense
org.pandora.sense.http.Http
public class Http
Process http requests, gets and puts.
Field Summary | |
---|---|
static int |
WORKER_THREADS
|
Fields inherited from class org.pandora.sense.BasicSense |
---|
DEFAULT_SPEAKER, RETRY |
Constructor Summary | |
---|---|
Http()
|
Method Summary | |
---|---|
void |
batchProcessURL(java.net.URL url,
Network network)
Process the URL as part of a batch. |
java.io.StringReader |
convertToXHTML(java.io.InputStream input)
Convert the HTML input stream into DOM parsable XHTML. |
org.w3c.dom.Node |
findNextTag(java.util.Set<java.lang.String> tags,
java.lang.String value,
org.w3c.dom.Node node,
org.w3c.dom.Node root)
Find the next node for the tag, search children, siblings and cousins. |
org.w3c.dom.Node |
findTag(java.util.Set<java.lang.String> tags,
java.lang.String value,
org.w3c.dom.Node node)
Find the next node for any of the tags whose text contains the value. |
org.w3c.dom.Node |
findTag(java.lang.String tag,
org.w3c.dom.Node node)
Find the next node for the tag. |
org.w3c.dom.Node |
findTag(java.lang.String tag,
java.lang.String value,
org.w3c.dom.Node node)
Find the next node for the tag. |
java.util.List<java.lang.String> |
getAllBullets(org.w3c.dom.Node node)
Parse the text values from the next bullet list. |
java.util.List<java.lang.String> |
getAllURLBullets(org.w3c.dom.Node node)
Parse the text values from the next bullet list. |
java.util.Map<java.lang.String,Http> |
getDomains()
Return the map of registered domain processing senses, keyed by their URL domains they accept. |
org.htmlcleaner.HtmlCleaner |
getHtmlCleaner()
|
java.util.List<java.lang.String> |
getNextBulletList(org.w3c.dom.Node node)
Parse the text values from the next bullet list. |
javax.xml.parsers.DocumentBuilder |
getParser()
|
Vertex |
getSentence(org.w3c.dom.Node node,
Network network)
Return a sentence of all the words, or a word is a single word. |
java.lang.String |
getText(org.w3c.dom.Node node)
Return the complete node text. |
void |
input(java.util.Collection<java.net.URL> input)
Process the list of URLs as a batch using multi threading. |
void |
input(java.lang.Object input,
Network network)
Get and process the URL. |
org.w3c.dom.Node |
nextNode(org.w3c.dom.Node node,
org.w3c.dom.Node root)
Return the next sibling or parent sibling node. |
void |
output(Vertex output)
Post, process the post request. |
org.w3c.dom.Element |
parseURL(java.net.URL url)
Parse the HTML as a DOM. |
org.w3c.dom.Element |
parseXHTML(java.io.StringReader input)
Parse the input XHTML stream into a DOM. |
void |
processHeaders(org.w3c.dom.Node node,
Vertex url,
Network network)
Process the header nodes and associate their topics with the URL. |
void |
processRoot(org.w3c.dom.Node node,
java.net.URL url,
Network network)
Process the XHTML DOM. |
Methods inherited from class org.pandora.sense.BasicSense |
---|
awake, getEmotionalState, getLanguageState, getName, getPandora, getPrimitive, getShortTermMemory, initialize, input, isEnabled, log, log, log, setEmotionalState, setIsEnabled, setLanguageState, setName, setPandora, shutdown, toString |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
public static int WORKER_THREADS
Constructor Detail |
---|
public Http()
Method Detail |
---|
public void batchProcessURL(java.net.URL url, Network network)
public java.io.StringReader convertToXHTML(java.io.InputStream input) throws java.io.IOException
java.io.IOException
public javax.xml.parsers.DocumentBuilder getParser() throws java.lang.Exception
java.lang.Exception
public org.htmlcleaner.HtmlCleaner getHtmlCleaner()
public org.w3c.dom.Element parseXHTML(java.io.StringReader input) throws java.lang.Exception
java.lang.Exception
public void input(java.lang.Object input, Network network) throws java.lang.Exception
input
in interface Sense
input
in class BasicSense
java.lang.Exception
public org.w3c.dom.Element parseURL(java.net.URL url)
public void input(java.util.Collection<java.net.URL> input)
public void processRoot(org.w3c.dom.Node node, java.net.URL url, Network network)
public void processHeaders(org.w3c.dom.Node node, Vertex url, Network network)
public org.w3c.dom.Node nextNode(org.w3c.dom.Node node, org.w3c.dom.Node root)
public org.w3c.dom.Node findNextTag(java.util.Set<java.lang.String> tags, java.lang.String value, org.w3c.dom.Node node, org.w3c.dom.Node root)
public org.w3c.dom.Node findTag(java.lang.String tag, org.w3c.dom.Node node)
public org.w3c.dom.Node findTag(java.lang.String tag, java.lang.String value, org.w3c.dom.Node node)
public org.w3c.dom.Node findTag(java.util.Set<java.lang.String> tags, java.lang.String value, org.w3c.dom.Node node)
public java.lang.String getText(org.w3c.dom.Node node)
public Vertex getSentence(org.w3c.dom.Node node, Network network)
public java.util.List<java.lang.String> getNextBulletList(org.w3c.dom.Node node)
public java.util.List<java.lang.String> getAllBullets(org.w3c.dom.Node node)
public java.util.List<java.lang.String> getAllURLBullets(org.w3c.dom.Node node)
public void output(Vertex output)
output
in interface Sense
output
in class BasicSense
public java.util.Map<java.lang.String,Http> getDomains()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |