# Copyright (c) 2008 Yahoo! Inc. All rights reserved.
# Licensed under the Yahoo! Search BOSS Terms of Use
# (http://info.yahoo.com/legal/us/yahoo/search/bosstos/bosstos-2317.html)

__author__ = "Vik Singh (viksi@yahoo-inc.com)"

from threading import Thread
import urllib

import ui
from yos.util import text, console, dedup
from yos.boss.templates import publisher
from yos.boss.ysearch import search
from yos.crawl import rest
from yos.yql import db

class SearchThread(Thread):
  def __init__(self, s, q):
    Thread.__init__(self)
    self.s = s
    self.q = q
    self.results = []

  def run(self):
    self.results = self.s(self.q)

def ns(q):
  return search(q, vertical="news", count=30, more={"orderby":"date"})

def ts(q):
  return rest.load_json("http://search.twitter.com/search.json?lang=en&q=%s&rpp=30" % urllib.quote_plus(q))

def nearest(r):
  if text.sim(r["tt$text"], r["yn$title"]) >= 0.24:
    r["neighbors"] += [(r["tt$from_user"], r["tt$text"])]
  return r

def prep(r):
  r["neighbors"] = []
  return r

def rank(r):
  r["rank"] = len(r["all_neighbors"])
  return r

def addurls(s):
  tokens = []
  for t in s.split():
    if t.startswith("http://"):
      tokens.append("<a href=\"%s\">%s</a>" % (t, t))
    else:
      tokens.append(t)
  return " ".join(tokens)

def fresh(q):
  nst = SearchThread(ns, q)
  nst.start()

  tst = SearchThread(ts, q)
  tst.start()

  nst.join()
  yn = db.create(name="yn", data=nst.results)

  tst.join()
  tt = db.create(name="tt", data=tst.results)

  tb = db.cross([yn, tt])
  tb = db.select(udf=prep, table=tb)
  tb = db.select(udf=nearest, table=tb)
  tb = db.group(by=["yn$title"], key="neighbors", reducer=lambda x,y: x+y, as="all_neighbors", norm=text.norm, table=tb, unique=False)
  tb = db.select(udf=rank, table=tb)
  tb = db.sort(key="rank", table=tb)

  dr = dedup.Dedup(["yn$title"])
  for row in tb.rows:
    dr.check(row)

  results = []
  for row in dr.results:
    r = {}
    r["<?result_url?>"] = row["yn$url"]
    r["<?result_title?>"] = row["yn$title"]
    r["<?result_abstract?>"] = row["yn$abstract"]
    r["<?result_dispurl?>"] = row["yn$date"] + " " + row["yn$time"]
    r["<?result_source?>"] = row["yn$source"]
    r["<?result_clickurl?>"] = row["yn$url"]

    items = []
    for n in row["all_neighbors"]:
      u, msg = n
      items.append( {"<?item_user?>": u, "<?item_text?>": addurls(msg), "<?item_link?>": "http://www.twitter.com/%s" % u} )

    dn = dedup.Dedup(["<?item_text?>"])
    for i in items:
      dn.check(i)

    results.append( (r, dn.results) )

  return ui.serp(q, "TweetNews Search '%s'" % q, "fresh?q=", results)
