public class OOSpider<T> extends Spider
@TargetUrl("http://my.oschina.net/flashsword/blog/\\d+") public class OschinaBlog{ @ExtractBy("//title") private String title; @ExtractBy(value = "div.BlogContent",type = ExtractBy.Type.Css) private String content; @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true) private List<String> tags; }And start the spider by:
OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog") ,new JsonFilePageModelPipeline(), OschinaBlog.class).run(); }
Spider.Status
destroyWhenExit, downloader, executorService, exitWhenComplete, logger, pageProcessor, pipelines, scheduler, site, spawnUrl, startRequests, stat, STAT_INIT, STAT_RUNNING, STAT_STOPPED, threadNum, threadPool, uuid
Modifier | Constructor and Description |
---|---|
protected |
OOSpider(us.codecraft.webmagic.model.ModelPageProcessor modelPageProcessor) |
|
OOSpider(PageProcessor pageProcessor) |
|
OOSpider(Site site,
PageModelPipeline pageModelPipeline,
Class... pageModels)
create a spider
|
Modifier and Type | Method and Description |
---|---|
OOSpider |
addPageModel(PageModelPipeline pageModelPipeline,
Class... pageModels) |
static OOSpider |
create(Site site,
Class... pageModels) |
static OOSpider |
create(Site site,
PageModelPipeline pageModelPipeline,
Class... pageModels) |
protected CollectorPipeline |
getCollectorPipeline() |
OOSpider |
setIsExtractLinks(boolean isExtractLinks) |
addPipeline, addRequest, addUrl, checkIfRunning, clearPipeline, close, create, downloader, extractAndAddRequests, get, getAll, getPageCount, getScheduler, getSite, getSpiderListeners, getStartTime, getStatus, getThreadAlive, getUUID, initComponent, isExitWhenComplete, isSpawnUrl, onError, onSuccess, pipeline, run, runAsync, scheduler, setDownloader, setEmptySleepTime, setExecutorService, setExitWhenComplete, setPipelines, setScheduler, setSpawnUrl, setSpiderListeners, setUUID, sleep, start, startRequest, startUrls, stop, test, thread, thread
protected OOSpider(us.codecraft.webmagic.model.ModelPageProcessor modelPageProcessor)
public OOSpider(PageProcessor pageProcessor)
public OOSpider(Site site, PageModelPipeline pageModelPipeline, Class... pageModels)
site
- sitepageModelPipeline
- pageModelPipelinepageModels
- pageModelsprotected CollectorPipeline getCollectorPipeline()
getCollectorPipeline
in class Spider
public static OOSpider create(Site site, PageModelPipeline pageModelPipeline, Class... pageModels)
public OOSpider addPageModel(PageModelPipeline pageModelPipeline, Class... pageModels)
public OOSpider setIsExtractLinks(boolean isExtractLinks)
Copyright © 2017. All rights reserved.