public class OOSpider<T> extends Spider
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+")
public class OschinaBlog{
@ExtractBy("//title")
private String title;
@ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css)
private String content;
@ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true)
private List<String> tags;
}
And start the spider by:
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog")
,new JsonFilePageModelPipeline(), OschinaBlog.class).run();
}
Spider.StatusdestroyWhenExit, downloader, executorService, exitWhenComplete, logger, pageProcessor, pipelines, scheduler, site, spawnUrl, startRequests, stat, STAT_INIT, STAT_RUNNING, STAT_STOPPED, threadNum, threadPool, uuid| Modifier | Constructor and Description |
|---|---|
protected |
OOSpider(us.codecraft.webmagic.model.ModelPageProcessor modelPageProcessor) |
|
OOSpider(PageProcessor pageProcessor) |
|
OOSpider(Site site,
PageModelPipeline pageModelPipeline,
Class... pageModels)
create a spider
|
| Modifier and Type | Method and Description |
|---|---|
OOSpider |
addPageModel(PageModelPipeline pageModelPipeline,
Class... pageModels) |
static OOSpider |
create(Site site,
Class... pageModels) |
static OOSpider |
create(Site site,
PageModelPipeline pageModelPipeline,
Class... pageModels) |
protected CollectorPipeline |
getCollectorPipeline() |
OOSpider |
setIsExtractLinks(boolean isExtractLinks) |
addPipeline, addRequest, addUrl, checkIfRunning, clearPipeline, close, create, downloader, extractAndAddRequests, get, getAll, getPageCount, getScheduler, getSite, getSpiderListeners, getStartTime, getStatus, getThreadAlive, getUUID, initComponent, isExitWhenComplete, isSpawnUrl, onError, onSuccess, pipeline, run, runAsync, scheduler, setDownloader, setEmptySleepTime, setExecutorService, setExitWhenComplete, setPipelines, setScheduler, setSpawnUrl, setSpiderListeners, setUUID, sleep, start, startRequest, startUrls, stop, test, thread, threadprotected OOSpider(us.codecraft.webmagic.model.ModelPageProcessor modelPageProcessor)
public OOSpider(PageProcessor pageProcessor)
public OOSpider(Site site, PageModelPipeline pageModelPipeline, Class... pageModels)
site - sitepageModelPipeline - pageModelPipelinepageModels - pageModelsprotected CollectorPipeline getCollectorPipeline()
getCollectorPipeline in class Spiderpublic static OOSpider create(Site site, PageModelPipeline pageModelPipeline, Class... pageModels)
public OOSpider addPageModel(PageModelPipeline pageModelPipeline, Class... pageModels)
public OOSpider setIsExtractLinks(boolean isExtractLinks)
Copyright © 2017. All rights reserved.