qq新闻 内容抓取 正则表达

简介:   qq 国内新闻 http://news.qq.com/c/816guonei_1.htm  http://news.qq.com/c/816guonei_2.htm                                                      ........   学校老师私开小卖部 带方便面进校遭食堂老板殴打 达州市渠县三汇中学王道明老师的妻子邓女士,在谈到丈夫与学校食堂承包商刘志勇,因携带方便面进校门发生纠纷继而被刘志勇打一事时,双眼恍惚,充满了恐惧。

 

qq 国内新闻

http://news.qq.com/c/816guonei_1.htm 

http://news.qq.com/c/816guonei_2.htm

                                                     ........

 

 
    <a target="_blank" class="pic" href="/a/20160430/004728.htm">
	<img class="picto" src="http://img1.gtimg.com/news/pics/hv1/130/103/2061/134042920_small.jpg"></a>
	<em class="f14 l24">
	<a target="_blank" class="linkto" href="/a/20160430/004728.htm">
	学校老师私开小卖部 带方便面进校遭食堂老板殴打
	</a>
	</em>
	<p class="l22">
	达州市渠县三汇中学王道明老师的妻子邓女士,在谈到丈夫与学校食堂承包商刘志勇,因携带方便面进校门发生纠纷继而被刘志勇打一事时,双眼恍惚,充满了恐惧。
    </p>
 

<a target=\"_blank\" class=\"pic\" href=\"([^\"]*)\">
<img class=\"picto\" src=\"([^\"]*)\">
</a>
<em class=\"f14 l24\">
<a target=\"_blank\" class=\"linkto\" href=\"[^\"]*\">
([^</a>]*)
</a>
</em>
<p class=\"l22\">
([^</p>]*)
</p>

 

demo 如图:

 

 

根据返回内容 进行正则匹配,获取 需要布局信息 QQNewsItemEntity 

(图片展示不好,可以查看图片附件)



 

package com.curiousby.fitnessandappointment.utils;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.curiousby.fitnessandappointment.entity.QQNewsItemEntity;

public class QQNewsMatcherPerformer {

	public static List<QQNewsItemEntity> parseHtmlData(String result) {
		List<QQNewsItemEntity> list = new ArrayList<QQNewsItemEntity>();
		Pattern pattern = Pattern
				.compile("<a target=\"_blank\" class=\"pic\" href=\"([^\"]*)\"><img class=\"picto\" src=\"([^\"]*)\"></a><em class=\"f14 l24\"><a target=\"_blank\" class=\"linkto\" href=\"[^\"]*\">([^</a>]*)</a></em><p class=\"l22\">([^</p>]*)</p>");
		Matcher matcher = pattern.matcher(result);
		while (matcher.find()) {
			QQNewsItemEntity model = new QQNewsItemEntity();
			model.setNewsDetailUrl(matcher.group(1).trim());
			model.setUrlImgAddress(matcher.group(2).trim());
			model.setNewsTitle(matcher.group(3).trim());
			model.setNewsSummary(matcher.group(4).trim()); 
			list.add(model);
		}
		return list;
	}
}

 

 

package com.curiousby.fitnessandappointment.entity;


public class QQNewsItemEntity {
	
	 
    //新闻详情地址
    private String newsDetailUrl;
    //新闻图片地址
    private String urlImgAddress;
    //新闻标题
    private String newsTitle;
    //新闻概要
    private String newsSummary; 
	public String getUrlImgAddress() {
        return urlImgAddress;
    }

    public void setUrlImgAddress(String urlImgAddress) {
        this.urlImgAddress = urlImgAddress;
    }

    public String getNewsDetailUrl() {
        return newsDetailUrl;
    }

    public void setNewsDetailUrl(String newsDetailUrl) {
        this.newsDetailUrl = newsDetailUrl;
    }


    public String getNewsTitle() {
        return newsTitle;
    }

    public void setNewsTitle(String newsTitle) {
        this.newsTitle = newsTitle;
    }

    public String getNewsSummary() {
        return newsSummary;
    }

    public void setNewsSummary(String newsSummary) {
        this.newsSummary = newsSummary;
    }
}

 

 

package com.curiousby.fitnessandappointment.request.http;

import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.json.JSONObject;

import android.util.Log;

import com.android.volley.Request.Method;
import com.android.volley.NetworkResponse;
import com.android.volley.Response;
import com.android.volley.Response.ErrorListener;
import com.android.volley.Response.Listener;
import com.android.volley.VolleyError;
import com.android.volley.toolbox.HttpHeaderParser;
import com.android.volley.toolbox.StringRequest;
import com.android.volley.toolbox.Volley;
import com.curiousby.fitnessandappointment.MyApplication;
import com.curiousby.fitnessandappointment.constants.Constants;
import com.curiousby.fitnessandappointment.entity.FeedAllEntity;
import com.curiousby.fitnessandappointment.entity.FeedCommentEntity;
import com.curiousby.fitnessandappointment.entity.FeedZanEntity;
import com.curiousby.fitnessandappointment.entity.QueryResultJson;
import com.curiousby.fitnessandappointment.quote.volley.GsonRequest;
import com.curiousby.fitnessandappointment.request.dbmanager.QQNewsHttpManager;
import com.curiousby.fitnessandappointment.request.dbmanager.TrendHttpManager;
import com.curiousby.fitnessandappointment.utils.JSONUtil;
import com.curiousby.fitnessandappointment.utils.JsonParser;
import com.google.gson.JsonElement;

public class QQNewsHttpRequest {

	
	public  static void getQQNewsDate(final int page){
		String url = Constants.getQQNewsUrl(page);
		StringRequest request = new StringRequest(url, new Listener<String>() {
 
			@Override
			public void onResponse(String response) { 
				if (page ==1 )
					QQNewsHttpManager.getQQNewsByPageFirst(response);
				else
					QQNewsHttpManager.getQQNewsByPage(response);	
			}
		}, new ErrorListener() { 
			@Override
			public void onErrorResponse(VolleyError error) { 
				QQNewsHttpManager.getQQNewsByPageError();
			}
		})
	/*	{
			@Override
			protected Response<String> parseNetworkResponse(
					NetworkResponse response) {
				String str = null;
		        try {
		            str = new String(response.data,"utf-8");
		        } catch (UnsupportedEncodingException e) { 
		            e.printStackTrace();
		        }
		        return Response.success(str, HttpHeaderParser.parseCacheHeaders(response));
			}
		}*/
		;
		Volley.newRequestQueue(MyApplication.newInstance()).add(request); 
	}
	
}

 

 

package com.curiousby.fitnessandappointment.activity;

import java.util.ArrayList;
import java.util.List;

import android.content.Context;
import android.content.Intent;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.widget.AdapterView;
import android.widget.AdapterView.OnItemClickListener;

import com.curiousby.fitnessandappointment.MyApplication;
import com.curiousby.fitnessandappointment.R;
import com.curiousby.fitnessandappointment.adpter.QQNewsAdapter;
import com.curiousby.fitnessandappointment.constants.Constants;
import com.curiousby.fitnessandappointment.entity.QQNewsItemEntity;
import com.curiousby.fitnessandappointment.quote.xlistview.MsgListView;
import com.curiousby.fitnessandappointment.quote.xlistview.MsgListView.IXListViewListener;
import com.curiousby.fitnessandappointment.request.event.QQNewsStringHttpEvent;
import com.curiousby.fitnessandappointment.request.event.base.RequestEvent;
import com.curiousby.fitnessandappointment.request.http.QQNewsHttpRequest;
import com.curiousby.fitnessandappointment.utils.QQNewsMatcherPerformer;

import de.greenrobot.event.EventBus;

public class QQNewsActivity extends BaseActivity implements IXListViewListener ,OnItemClickListener{
	
	
	private MsgListView mListView;
	private List<QQNewsItemEntity> mDataList;
	private QQNewsAdapter mAdapter;
	private static int start = 1; 
	private Context mContext;
	public final static String QQ_NEWS_DETAIL_URL = "qq_news_detail_url";
	
	@Override
	protected void onCreate(Bundle bundle) { 
		super.onCreate(bundle);
		setContentView(R.layout.qqnews_list);
		this.mContext = QQNewsActivity.this;
		
		initUtils();
		initView( );
		initListeners();
		
		EventBus.getDefault().register( this );
		start =1;
		QQNewsHttpRequest.getQQNewsDate(start);
	}

	
 
	
 
	private void initUtils() {  
		mDataList =  new ArrayList<QQNewsItemEntity>();
		mAdapter = new QQNewsAdapter(mContext); 
		mAdapter.setmDataList(mDataList); 
	}
	private void initView() { 
		mListView = (MsgListView) this.findViewById(R.id.qq_news_list);   
		mListView.setAdapter(mAdapter);
	}
	private void initListeners() {  
		mListView.setPullLoadEnable(true);
		mListView.setPullRefreshEnable(true);
		mListView.setXListViewListener(this);
		mListView.setAdapter(mAdapter); 
		mListView.setOnItemClickListener( this );
	}
	

	@Override
	public void onRefresh() { 
		start =1; 
		mListView.setPullLoadEnable(true);
		mListView.setPullRefreshEnable(true);
		QQNewsHttpRequest.getQQNewsDate(start);
	}


	@Override
	public void onLoadMore() { 
		start += 1; 
		QQNewsHttpRequest.getQQNewsDate(start);
	}

	@Override
    public void onDestroy() {
        EventBus.getDefault().unregister( this );
        super.onDestroy();
    }
 
	
	public void onEventMainThread(RequestEvent requestEvent){
		
		if(requestEvent instanceof QQNewsStringHttpEvent){
			QQNewsStringHttpEvent event = (QQNewsStringHttpEvent) requestEvent;
			switch(event.status){
			case HTTP_ERROR:
				mListView.stopRefresh(); 
				mListView.stopLoadMore();
				mListView.setPullLoadEnable(false);
				break;
			case HTTP_START:
			{
				mListView.stopRefresh(); 
				mListView.stopLoadMore();
				String result = event.data;
				mAdapter.clearMDataList();
				List<QQNewsItemEntity> list = QQNewsMatcherPerformer.parseHtmlData(result);
				mAdapter.setmDataList(list);
				mAdapter.notifyDataSetChanged(); 
			
			}
			break;
			case HTTP_SUCCESS: {  
				
				mListView.stopRefresh(); 
				mListView.stopLoadMore();
				String result = event.data;
				List<QQNewsItemEntity> list = QQNewsMatcherPerformer.parseHtmlData(result);
				mAdapter.addMDataList(list);
				mAdapter.notifyDataSetChanged();  
			  }
			  break;
			default:
				break;
			}
			
			
		}
		
	}





	@Override
	public void onItemClick( AdapterView<?> parent, View view, int position, long id) { 
		 QQNewsItemEntity item = mAdapter.getItem(position - 1 );
		 if (item != null) {
				Intent msgIntent  = new Intent();  
				Log.e("baoyou", Constants.url_qq_news_base + item.getNewsDetailUrl());
				Bundle bundle = new Bundle(); // Bundle的底层是一个HashMap<String, Object
	            bundle.putString(QQNewsActivity.QQ_NEWS_DETAIL_URL, item.getNewsDetailUrl() );
	            msgIntent.putExtra("bundle", bundle);
	  
	            msgIntent.setClass(MyApplication.newInstance(), QQNewsDetailActivity.class);
	            startActivityForResult( msgIntent, 1000 );
			}
		 
	}

	
}

 

 

package com.curiousby.fitnessandappointment.activity;
 
import android.content.Intent;
import android.os.Bundle;
import android.util.Log;
import android.webkit.WebSettings;
import android.webkit.WebSettings.LayoutAlgorithm;
import android.webkit.WebView;
import android.webkit.WebViewClient;

import com.curiousby.fitnessandappointment.R;
import com.curiousby.fitnessandappointment.constants.Constants;

public class QQNewsDetailActivity extends BaseActivity  {
	
	
	 private WebView webView;
	private String url ="https://www.baidu.com";
	@Override
	protected void onCreate(Bundle bundle) { 
		super.onCreate(bundle);
		setContentView(R.layout.qqnews_list_item_detail);
		
		initUtils();
		initView( );
		initListeners();
		 
	}

	private void initListeners() { 
		webView.loadUrl(url); 
		showWaitingDialog("");
		webView.setWebViewClient(new WebViewClient() {      
            @Override      
            public boolean shouldOverrideUrlLoading(WebView view, String url)      
            {     
              view.loadUrl(url);  
              dismissWaitingDialog();
              return true;      
            }      
      });     
	}

	private void initView() {
		webView = (WebView) this.findViewById(R.id.wv_qq_news_item_detail_webview);
		WebSettings settings = webView.getSettings();
		settings.setJavaScriptEnabled(true);  //支持js
		//settings.setPluginsEnabled(true);  //支持插件 
		settings.setUseWideViewPort(false);  //将图片调整到适合webview的大小 
		settings.setSupportZoom(true);  //支持缩放 
		settings.setLayoutAlgorithm(LayoutAlgorithm.SINGLE_COLUMN);
	}

	private void initUtils() {
		Intent intent = getIntent();
		 Bundle bundle = intent.getBundleExtra("bundle");
		String path =bundle.getString(QQNewsActivity.QQ_NEWS_DETAIL_URL);
		url = Constants.url_qq_news_base + path;
	}

	
 
	
 
	 

	
}

 

package com.curiousby.fitnessandappointment.adpter;

import java.util.ArrayList;
import java.util.List;

import android.content.Context;
import android.content.res.Resources;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.view.LayoutInflater;
import android.view.View;
import android.view.ViewGroup;
import android.widget.BaseAdapter;
import android.widget.ImageView;
import android.widget.TextView;

import com.curiousby.fitnessandappointment.R;
import com.curiousby.fitnessandappointment.entity.QQNewsItemEntity;
import com.curiousby.fitnessandappointment.quote.volley.ImageCacheManager;

public class QQNewsAdapter extends BaseAdapter{

	
	private LayoutInflater mInflater;
	private List<QQNewsItemEntity> mDataList; 
	private Context mContext;
	
	 
	
	public QQNewsAdapter(Context mContext) {
		this.mContext = mContext;
		this.mInflater = LayoutInflater.from(mContext);
	} 
	
	public List<QQNewsItemEntity> getmDataList() {
		return mDataList;
	} 
	public void setmDataList(List<QQNewsItemEntity> mDataList) {
		if (this.mDataList == null ) {
			this.mDataList = new ArrayList<QQNewsItemEntity>();
		}
		this.mDataList = mDataList;
	}  
	public void addMDataList(List<QQNewsItemEntity> mDataList) {
		this.mDataList.addAll(mDataList);
	}
	public void clearMDataList( ) {
		this.mDataList.clear();
	}

	@Override
	public int getCount() { 
		if (mDataList != null && mDataList.size() > 0) 
			return mDataList.size();
		 return 0;
	}

	@Override
	public QQNewsItemEntity getItem(int position) {
		if (mDataList != null && mDataList.size() > 0) 
			return mDataList.get(position);
		return null;
	}

	@Override
	public long getItemId(int position) { 
		return 0;
	}

	@Override
	public View getView(int position, View convertView, ViewGroup parent) { 
		ViewHolder viewHolder   = null; 
		 if ( convertView == null || convertView.getTag() ==null ) {
	            convertView = mInflater.inflate( R.layout.qqnews_list_item,parent, false);
	            viewHolder = new ViewHolder(convertView); 
	            convertView.setTag( viewHolder );
		 }else{
			 viewHolder = ( ViewHolder ) convertView.getTag();
		 }
		 QQNewsItemEntity qqnie = getItem(position);
		 if (qqnie != null) { 
		 ImageCacheManager.loadImage(qqnie.getUrlImgAddress(),viewHolder.qqnewspic, getBitmapFromRes(R.drawable.ic_launcher), getBitmapFromRes(R.drawable.ic_launcher));
		 viewHolder.qqnewstitle.setText(qqnie.getNewsTitle());
		 viewHolder.qqnewscontent.setText(qqnie.getNewsSummary());
		 }
		return convertView;
	}

	
	class ViewHolder{
		public ImageView qqnewspic;
		public TextView  qqnewstitle;
		public TextView  qqnewscontent;
		public ViewHolder(View baseView) { 
			this.qqnewspic = (ImageView) baseView.findViewById(R.id.qq_news_image_link);
			this.qqnewstitle = (TextView) baseView.findViewById(R.id.qq_news_txt_title);
			this.qqnewscontent =  (TextView) baseView.findViewById(R.id.qq_news_txt_summary);
		} 
		
	}
	 public Bitmap getBitmapFromRes(int resId) {
         Resources res = mContext.getResources();
      return BitmapFactory.decodeResource(res, resId);
	 }
	 
	
}

 

<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:layout_width="match_parent"
    android:layout_height="wrap_content"
    android:padding="1dp">

    <ImageView
        android:id="@+id/qq_news_image_link"
        android:layout_width="80dp"
        android:layout_height="wrap_content"
        android:scaleType="centerCrop"
        android:layout_centerVertical="true"
        android:layout_above="@+id/v_qq_news_line"
        android:background="@drawable/ic_launcher"  
        />

    <LinearLayout
        android:layout_marginLeft="10dp"
        android:id="@+id/ll_qq_news_contentandhead"
        android:layout_width="match_parent"
        android:layout_height="wrap_content"
        android:layout_toRightOf="@+id/qq_news_image_link" 
        android:orientation="vertical">

        <TextView
            android:id="@+id/qq_news_txt_title"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:text="标题"
            android:textSize="16dp" />

        <TextView
            android:layout_marginTop="5dp"
            android:id="@+id/qq_news_txt_summary"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:text="内容"
            android:layout_marginBottom="3dp" 
            android:textSize="12dp" />
    </LinearLayout>
	<View 
	    android:id="@+id/v_qq_news_line"
        android:layout_width="match_parent"
        android:layout_height="1dp"  
        android:layout_marginBottom="3dp" 
        android:layout_marginTop="5dp" 
        android:background="@color/grey"
        android:layout_alignParentBottom="true"/>
</RelativeLayout>

 

<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:orientation="vertical"  
    >
  
     <RelativeLayout
         android:id="@+id/rl_qq_news_head"
        android:layout_width="fill_parent"
        android:layout_height="wrap_content"
        android:background="@drawable/title_bar"
        android:gravity="center_vertical"
        android:orientation="horizontal"
        android:padding="3dp" 
         android:layout_alignParentTop="true"
        >

        <TextView
            android:id="@+id/tv_qq_news_head_title"
            android:layout_width="fill_parent"
            android:layout_height="wrap_content"
            android:gravity="center"
            android:text="新闻"
            android:textColor="#FFF"
            android:textSize="20sp"
            android:textStyle="bold" />
    </RelativeLayout> 
     
    <com.curiousby.fitnessandappointment.quote.xlistview.MsgListView
            android:id="@+id/qq_news_list" 
       		 android:layout_below="@id/rl_qq_news_head"
        	android:background="@color/white"
            android:layout_width="match_parent"
            android:layout_height="fill_parent" 
            android:divider="@null"> 
   </com.curiousby.fitnessandappointment.quote.xlistview.MsgListView>
 
 
   

</RelativeLayout>

 

 

 

 

package com.curiousby.fitnessandappointment.constants;

import java.util.ArrayList;
import java.util.List;

import com.curiousby.fitnessandappointment.R;
import com.curiousby.fitnessandappointment.entity.GridEntity;

public class Constants {
	
	public  final static String BASEIP ="http://127.0.0.1:8080"; 
	public final static String login_url = BASEIP + "/RepositoryPic/WebService/rest/user/login";
	public final static String friendship_url = BASEIP + "/RepositoryPic/WebService/rest/friendShip/friendShips/";
	public final static String url_addDeviceToTag = BASEIP + "/RepositoryPic/WebService/rest/pushService/addDevicesToTag";
	public final static String url_post_sendMessageToTag =BASEIP + "/RepositoryPic/WebService/rest/pushService/sendMessageToTag";
	public final static String url_feed =BASEIP + "/RepositoryPic/WebService/rest/feedAll/feedAlls/user";
	public final static String url_add_feed_comment =BASEIP + "/RepositoryPic/WebService/rest/feedAll/feedCommemtAdd";
	public final static String url_add_feed_zan =BASEIP + "/RepositoryPic/WebService/rest/feedAll/feedZanAdd";
	public final static String url_feed_create = BASEIP + "/RepositoryPic/WebService/rest/feedAll/createFeed";
	
	public final static String url_qq_news_base="http://news.qq.com";
	public   static String url_qq_news_guonei =  url_qq_news_base +"/c/816guonei_?.htm";
	public  static String getQQNewsUrl (int page){
		return url_qq_news_guonei.replace("?", page+"");
	}
	
	
	
	public static List<GridEntity> getGridItem() {
		List<GridEntity> list = new ArrayList<GridEntity>();
		list.add(new GridEntity(1, "动态", R.drawable.mian_trend_grid_trend, 1));
		list.add(new GridEntity(2, "悦动圈", R.drawable.mian_trend_grid_yuedongquan, 2)); 
		list.add(new GridEntity(3, "新闻", R.drawable.qq_ketang, 3)); 

		return list;
	}
	
	 

}

 

 

 

 

 

 

 

 

 

 

 

 

 

捐助开发者

在兴趣的驱动下,写一个免费的东西,有欣喜,也还有汗水,希望你喜欢我的作品,同时也能支持一下。 当然,有钱捧个钱场(右上角的爱心标志,支持支付宝和PayPal捐助),没钱捧个人场,谢谢各位。



 
 
 谢谢您的赞助,我会做的更好!

 

 

 

目录
相关文章
|
8月前
|
XML 数据采集 Web App开发
XPath数据提取与贴吧爬虫应用示例
XPath(XML Path Language)是一种用于在XML文档中定位和选择节点的语言。它提供了一种简洁的方式来遍历和提取XML文档中的数据。
75 1
Typecho-handsome主题统计访客人次
本次增强是基于handsome6.0版本的一个右侧栏博客信息的增强,如下图所示,初始的handsome主题博客信息栏只有四个栏目信息————分别是:文章数目,评论数目,运行天数,最后活动。那么该如何修改源代码来实现增加多个栏目呢?本次以添加一个记录网站总浏览人次的功能为例
184 0
Typecho-handsome主题统计访客人次
|
数据采集 Python
Python爬虫:使用newspaper解析新闻页面信息
Python爬虫:使用newspaper解析新闻页面信息
336 0
|
Web App开发 搜索推荐 .NET
识别真假搜索引擎(搜索蜘蛛)方法(baidu,google,Msn,sogou,soso等)
http://www.useragentstring.com/pages/useragentstring.php   今天分析研究了两个网站的 Apache 日志,分析日志虽然很无聊,但却是很有意义的事情,比如跟踪 SPAM 的 User Agent。
3451 0
|
算法
百度搜索技巧,精确搜索,搜索指定标题、内容、网址,黑语法搜索入门
百度搜索技巧,精确搜索,搜索指定标题、内容、网址,黑语法搜索入门
219 0
百度搜索技巧,精确搜索,搜索指定标题、内容、网址,黑语法搜索入门
去除csdn广告的方法,多种方法比较总结
1.安装各种博客分享的扩展程序,去除csdn广告
738 0
去除csdn广告的方法,多种方法比较总结
|
JSON 数据格式
爬取微博评论内容
网友:看看胸女:滚 网友:美胸比赛女:[图片消息] ​​​​ 继上次知乎话题 拥有一副好身材是怎样的体验? 解析了知乎回答内容之后,这次我们来解析一下微博内容,以微博网友发起的美胸大赛为例: https://m.weibo.cn/detail/4367970740108457 https://m.weibo.cn/detail/4348022520956497 这就是本次要获取的微博图片内容,共计672张很凶的照片: 下面是讲如何获取的,不感兴趣的话直接去公众号回复 套图 即可获得。
2157 0