Tuesday, December 10, 2013

Converting String to Integer without using a built in method

Converting String to Integer without using a built in method




public class TestStringtoInt {


public static void main(String []args){

String s="-s234.23";
char[]a=s.toCharArray();
int sum=0;
for(int i=0;i<a.length;i++){
int number=a[i]-'0';
sum=sum*10+number;
System.out.println(sum);

}


}



}

Object cloning Example in Java: create a list from existing list and have the new list modified for a single attribute.

Use case

You have a Book Class with several attributes, and you have to create a list from existing list and have the new list modified for a single attribute.



public class Book implements Cloneable{

private String bookName;
private String bookAuthor;
private String bookId;
public Book(String bookAuthor,String bookName,String bookId){
this.bookAuthor=bookAuthor;
this.bookName=bookName;
this.bookId=bookId;
}
public Book(){
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName;
}
public String getBookAuthor() {
return bookAuthor;
}
public void setBookAuthor(String bookAuthor) {
this.bookAuthor = bookAuthor;
}
public String getBookId() {
return bookId;
}
public void setBookId(String bookId) {
this.bookId = bookId;
}
protected Object clone() throws CloneNotSupportedException {

    Book clone=(Book)super.clone();
       
    return clone;

  }

}

import java.util.ArrayList;
import java.util.Iterator;


public class TestFunctionality {
/** You have a Book Class with several attributes, and you have to create a list from existing list and have the new list modified for a 
* single attribute.
*  
*  
**/
public static void main(String []arg){
ArrayList <Book>bookArrayList=new ArrayList<Book>();
ArrayList <Book>bookArrayList1=new ArrayList<Book>();
Book b1=new Book("Sachi","AA123","CoreJava");
Book b2=new Book("Sachii","AA1234","CoreJavaa");
Book b3=new Book("Sachiii","AA12345","CoreJavaaaa");
bookArrayList.add(b1);
bookArrayList.add(b2);
bookArrayList.add(b3);
Iterator<Book> bookListIterator = bookArrayList.iterator();
while (bookListIterator.hasNext()){
Book b=new Book();
try {
b=(Book) bookListIterator.next().clone();
System.out.println(b.getBookAuthor());
b.setBookAuthor("SACH");
bookArrayList1.add(b);
} catch (CloneNotSupportedException e) {
e.printStackTrace();
}
}
int length=bookArrayList1.size();
System.out.println("Length of the new list is " + length);
Iterator<Book> bookListIterator1 = bookArrayList1.iterator();
while (bookListIterator1.hasNext()){
System.out.println(bookListIterator1.next().getBookAuthor());
}
}
}



Linked list implementation In Java

Linked list implementation In Java 

package sachi.test.datastructures;

import java.util.Iterator;
import java.util.NoSuchElementException;

public class LinkedList<AnyType> implements Iterable<AnyType>
{
   private Node<AnyType> head;

 /**
   *  Constructs an empty list
   */
   public LinkedList()
   {
      head = null;
   }
 /**
   *  Returns true if the list is empty
   *
   */
   public boolean isEmpty()
   {
      return head == null;
   }
 /**
   *  Inserts a new node at the beginning of this list.
   *
   */
   public void addFirst(AnyType item)
   {
      head = new Node<AnyType>(item, head);
   }
 /**
   *  Returns the first element in the list.
   *
   */
   public AnyType getFirst()
   {
      if(head == null) throw new NoSuchElementException();

      return head.data;
   }
 /**
   *  Removes the first element in the list.
   *
   */
   public AnyType removeFirst()
   {
      AnyType tmp = getFirst();
      head = head.next;
      return tmp;
   }
 /**
   *  Inserts a new node to the end of this list.
   *
   */
   public void addLast(AnyType item)
   {
      if( head == null)
         addFirst(item);
      else
      {
         Node<AnyType> tmp = head;
         while(tmp.next != null) tmp = tmp.next;

         tmp.next = new Node<AnyType>(item, null);
      }
   }
 /**
   *  Returns the last element in the list.
   *
   */
   public AnyType getLast()
   {
      if(head == null) throw new NoSuchElementException();

      Node<AnyType> tmp = head;
      while(tmp.next != null) tmp = tmp.next;

      return tmp.data;
   }
 /**
   *  Removes all nodes from the list.
   *
   */
   public void clear()
   {
      head = null;
   }
 /**
   *  Returns true if this list contains the specified element.
   *
   */
   public boolean contains(AnyType x)
   {
      for(AnyType tmp : this)
         if(tmp.equals(x)) return true;

      return false;
   }
 /**
   *  Returns the data at the specified position in the list.
   *
   */
   public AnyType get(int pos)
   {
      if (head == null) throw new IndexOutOfBoundsException();

      Node<AnyType> tmp = head;
      for (int k = 0; k < pos; k++) tmp = tmp.next;

      if( tmp == null) throw new IndexOutOfBoundsException();

      return tmp.data;
   }
 /**
   *  Returns a string representation
   *
   */
   public String toString()
   {
      StringBuffer result = new StringBuffer();
      for(Object x : this)
      result.append(x + " ");

      return result.toString();
   }
 /**
   *  Inserts a new node after a node containing the key.
   *
   */
   public void insertAfter(AnyType key, AnyType toInsert)
   {
      Node<AnyType> tmp = head;

      while(tmp != null && !tmp.data.equals(key)) tmp = tmp.next;

      if(tmp != null)
         tmp.next = new Node<AnyType>(toInsert, tmp.next);
   }
 /**
   *  Inserts a new node before a node containing the key.
   *
   */
   public void insertBefore(AnyType key, AnyType toInsert)
   {
      if(head == null) return;

      if(head.data.equals(key))
      {
         addFirst(toInsert);
         return;
      }

      Node<AnyType> prev = null;
      Node<AnyType> cur = head;

      while(cur != null && !cur.data.equals(key))
      {
         prev = cur;
         cur = cur.next;
      }
      //insert between cur and prev
      if(cur != null)
         prev.next = new Node<AnyType>(toInsert, cur);
   }
 /**
   *  Removes the first occurrence of the specified element in this list.
   *
   */
   public void remove(AnyType key)
   {
      if(head == null)
         throw new RuntimeException("cannot delete");

      if( head.data.equals(key) )
      {
         head = head.next;
         return;
      }

      Node<AnyType> cur  = head;
      Node<AnyType> prev = null;

      while(cur != null && !cur.data.equals(key) )
      {
         prev = cur;
         cur = cur.next;
      }

      if(cur == null)
         throw new RuntimeException("cannot delete");

      //delete cur node
      prev.next = cur.next;
   }
 /**
   *  Returns a deep copy of the list
   *  Complexity: O(n^2)
   */
   public  LinkedList<AnyType> copy1()
   {
      LinkedList<AnyType> twin = new LinkedList<AnyType>();
      Node<AnyType> tmp = head;
      while(tmp != null)
      {
         twin.addLast( tmp.data );
         tmp = tmp.next;
      }

      return twin;
   }
 /**
   *  Returns a deep copy of the list
   *  Complexity: O(n)
   */
   public LinkedList<AnyType> copy2()
   {
      LinkedList<AnyType> twin = new LinkedList<AnyType>();
      Node<AnyType> tmp = head;
      while(tmp != null)
      {
         twin.addFirst( tmp.data );
         tmp = tmp.next;
      }

      return twin.reverse();
   }
 /**
   *  Reverses the list
   *  Complewxity: O(n)
   */
   public LinkedList<AnyType> reverse()
   {
      LinkedList<AnyType> list = new LinkedList<AnyType>();
      Node<AnyType> tmp = head;
      while(tmp != null)
      {
         list.addFirst( tmp.data );
         tmp = tmp.next;
      }
      return list;
   }
 /**
   *  Returns a deep copy of the immutable list
   *  It uses a tail reference.
   *  Complexity: O(n)
   */
   public LinkedList<AnyType> copy3()
   {
      LinkedList<AnyType> twin = new LinkedList<AnyType>();
      Node<AnyType> tmp = head;
      if(head==null) return null;
      twin.head = new Node<AnyType>(head.data, null);
      Node<AnyType> tmpTwin = twin.head;
      while(tmp.next != null)
      {
         tmp = tmp.next;
         tmpTwin.next = new Node<AnyType>(tmp.data, null);
         tmpTwin = tmpTwin.next;
      }

      return twin;
   }

 /*******************************************************
 *
 *  The Node class
 *
 ***
   *  Helper class used to implement list chain. As this is a private helper
   *  class it is acceptable to have public instance variables. Instances of
   *  this class are never made available to client code of the list.
   *
 ********************************************************/
   private static class Node<AnyType>
   {
      private AnyType data;
      private Node<AnyType> next;

      public Node(AnyType data, Node<AnyType> next)
      {
         this.data = data;
         this.next = next;
      }
   }

 /*******************************************************
 *
 *  The Iterator class
 *Iterator class for List to allow each list element to be
   *  visited in sequence. The iterator class is nested in the list class
   *  and is non-static meaning it has access to the state of the
   *  list object being iterated. The iterator is also public so iterator
   *  objects can be used by client code. As the class is nested, clients
   *  need to use the name LinkedList.Iterator to refer to the iterator
   *  class.
 ********************************************************/

   public Iterator<AnyType> iterator()
   {
      return new LinkedListIterator();
   }

   private class LinkedListIterator  implements Iterator<AnyType>
   {
      private Node<AnyType> nextNode;

      public LinkedListIterator()
      {
         nextNode = head;
      }

      public boolean hasNext()
      {
         return nextNode != null;
      }

      public AnyType next()
      {
         if (!hasNext()) throw new NoSuchElementException();
         AnyType res = nextNode.data;
         nextNode = nextNode.next;
         return res;
      }

      public void remove() { throw new UnsupportedOperationException(); }
   }



/*****   Include the main() for testing and debugging  *****/


   public static void main(String[] args)
   {
      LinkedList<String> list = new LinkedList <String>();
      list.addFirst("p");
      list.addFirst("a");
      list.addFirst("e");
      list.addFirst("h");
      System.out.println(list);
      list.insertAfter("e","f");
      System.out.println(list);

LinkedList<String> twin = list.copy3();
      System.out.println(twin);

      System.out.println(list.get(0));
// System.out.println(list.get(4));   //exception

      list.addLast("s");
      Iterator itr = list.iterator();
      while(itr.hasNext())
         System.out.print(itr.next() + " ");
      System.out.println();

      for(Object x : list)
         System.out.print(x + " ");
      System.out.println();

      list.insertAfter("e", "ee");
      System.out.println(list);
      System.out.println(list.getLast());

      list.insertBefore("h", "yy");
      System.out.println(list);

      list.remove("p");
      System.out.println(list);
}
}







Monday, December 9, 2013

Data Consistency in Cassandra

Understanding Data Consistency in Cassandra

To understand the data consistency in Cassandra we need to understand the points below.

1. Overview Cassandra reads/writes.
2. Details of how writes are performed in Cassandra.
3. The CAP theorem.
4. Tunable data consistency
5. Choosing a data consistency strategy for writes.
6. Choosing a data consistency strategy for  reads.
7. Example.


-Overview Cassandra reads/writes.

Cassandra is a peer to peer architecture , it does not have the concept of master or salve. All the nodes in cluster, may be in different rack, may be in different datacenter all are treated as the same so the data can be written to any node and can be read from any node. Cassandra automatically takes care of partitioning and replicating the data through out the cluster in any rack or any datacenter.


-Details of how writes are performed in Cassandra.

Writes in Cassandra are considered to be extremely fast in the industry today. When data is written to Cassandra the data is persisted in a commit log for durability. The same data is then moved to a in memory table called the memtable. Once the memtable is  full the data is moved to the disc called the SSTables. Writes in Cassandra are atomic at row level, all columns are written or  updated or not written at all. RDBMS style transaction are not supported. Based on a benchmark independently it has ben recorded that Cassandra has
a. 4x better writes.
b. 2x better reads.
c. 12x better in reads/updates.


-The CAP theorem

In distributed database system you can have two of three things.

-you can have strong consistency which mean reading and writing the latest copy of the data.
-you can have Strong availability of the data which means if one node goes done  which has the data you still have other nodes which has the data to server the request.
-You can loose messages between couple of the nodes but still the system operate well.

Cassandra is known for having strong availability an partition tolerance but its provides tunable data consistency


-Tunable data consistency

In Cassandra you have the flexibility to choose the data consistency, strong or eventual. The data consistency can be defined in Cassandra per operation basis.

-Data consistency strategy for writes

There are various strategy for writes.
1. Any: A write must succeed on any available node.
2. One: A write must succeed on any node responsible for that row.
3. Quorum: A write must succeed on a quorum of replica nodes which is determined by (replicationFactor/2)+1.
4. Local_Quorum: A write must succeed on a quorum of replica nodes in the same datacenter as the coordinator node.
5. Each_Quorum: A write must succeed on a quorum of replica nodes in all data centers.
6. All: A write must succeed on all replica nodes for a row key.


-Hinted Handoffs

Hinted Handoff is a methodology implemented in Cassandra when performing writes to a row for all replicas for that row. If all replica nodes are not available then a hint is stored one of the nodes to update the downed nodes with the row once the node are available. If no replica nodes are available then use of any consistency level will instruct the coordinator node to store the hint and the row data which is passed to replica node once its is available.


-Data consistency strategy for Reads

There are various strategy for reads in Cassandra
1. One: reads the closest node holding the data.
2. Quorum: Returns a result from a Quorum of servers with the most recent timestamp for the data.
3. Local_Quorum: Returns the result from a Quorum of the servers with most recent timestamp for the data in the same data center as the coordinator node.
4. Each_Quorum: Returns the result from a Quorum of servers with the most recent timestamp in all data centers.
5. All: Returns the result from all replica nodes for the key.


-Read Repair

To ensure the data consistency while reading Cassandra performs read repair. suppose i am reading a data which is stale in one of the node, Cassandra issues a repair to other node which has the data the most recent data is updated on the node which has issued the repair so the next time when request comes it will give the latest data.


USING CONSISTENCY clause can be used to provide the consistency level on the operation to be executed.


Understanding the architecture of Cassandra

Whats is Cassandra ?

Some of us know about Cassandra and some of may not know what Cassandra is ?. Well Cassandra is a high performance , fault tolerant , extremely scalable and distributed database management system. It is not relational or we can say it as a post relational database solution. It can serve as realtime data store for online / transactional application and also can be used for read intensive database for business intelligence systems.

Overview of Cassandra Architecture.

Cassandra was a thoughtful innovation keeping in mind that failure is the key to success i.e there may be hardware failure or system crashes and data is important. It is a peer to peer design database management system, there is no concept of master or slave. Being a peer to peer architecture you can read / write to any Cassandra node in a cluster, all the nodes are treated as the same. Data is partitioned throughout the nodes and it ensure the system to be fault tolerant by replication the data though custom data replication.

Each node in cassandra communicates with each other through gossip protocol, which exchanges the information across the cluster in intervals. 

When data is written to Cassandra to assure the data durability it logs all the data to a commit log. The data in the commit log is then written to a in memory data structure call the memtable. Once the memtable is full the data is written to the disk called the SSTable.

The data is contained within a schema which is based on google big table , its a row oriented column structured design. It has the concept of keyspace which is similar to that of a relational database, The column family is the core object to manage data which is again very similar to relational database management system but the scheme is more flexible and dynamic in nature. A row in a column family can be indexed by its key and also other columns can be indexed as well.


Why would you use Cassandra ??

1. Scaling to gigabyte or petabyte.
2. By adding nodes linear performance can be achieved.
3. No single point of failure.
4. Data is distributed and replication of data is easy.
5. Capability to run in multiple datacenter and cloud.
6. Specially in Cassandra there is no need of a caching layer.
7. Tunable data consistency.
8. Schema design is flexible.
9. SQL like queries.
10. Support key languages and runs on commodity hardware or software.
11. Data compression with no performance penalty.

Friday, November 29, 2013

Oozie: The Workflow engine

Oozie is a server based Workflow Engine specialized in running workflow jobs with actions that run Hadoop Map/Reduce and Pig jobs

Oozie have three levels of meaning:

A server based workflow engine , a server based Coordinator Engine and a server based Bundle Engine .Oozie is a Java Web-Application that runs in a Java servlet-container

Oozie can store and run different type of hadoop jobs(mapreduce,hive,pig,and so on),can run workflow jobs based on time and data triggers,also can manage  batch   coordinator applications.

Oozie has been designed to scale, and it can manage the timely execution of thousands of workflow in a Hadoop cluster, each composed of possibly dozens of constituent jobs. Oozie workflow is a collection of actions (i.e. Hadoop Map/Reduce jobs, Pig jobs) arranged in a control dependency DAG (Direct Acyclic Graph), specifying a sequence of actions execution. This graph is specified in hPDL (a XML Process Definition Language).


hPDL is a fairly compact language, using a limited amount of flow control and action nodes. Control nodes define the flow of execution and include beginning and end of a workflow (start, end and fail nodes) and mechanisms to control the workflow execution path ( decision, fork and join nodes). Action nodes are the mechanism by which a workflow triggers the execution of a computation/processing task. Oozie provides support for the following types of actions: Hadoop map-reduce, Hadoop file system, Pig, Java and Oozie sub-workflow (SSH action is removed as of Oozie schema 0.2).

All computation/processing tasks triggered by an action node are remote to Oozie - they are executed by Hadoop Map/Reduce framework. This approach allows Oozie to leverage existing Hadoop machinery for load balancing, fail over, etc. The majority of these tasks are executed asynchronously (the exception is the file system action that is handled synchronously). This means that for most types of computation/processing tasks triggered by workflow action, the workflow job has to wait until the computation/processing task completes before transitioning to the following node in the workflow. Oozie can detect completion of computation/processing tasks by two different means, callbacks and polling. When a computation/processing tasks is started by Oozie, Oozie provides a unique callback URL to the task, the task should invoke the given URL to notify its completion. For cases that the task failed to invoke the callback URL for any reason (i.e. a transient network failure) or when the type of task cannot invoke the callback URL upon completion, Oozie has a mechanism to poll computation/processing tasks for completion.

Oozie workflows can be parameterized (using variables like ${inputDir} within the workflow definition). When submitting a workflow job values for the parameters must be provided. If properly parameterized (i.e. using different output directories) several identical workflow jobs can concurrently.

Some of the workflows are invoked on demand, but the majority of times it is necessary to run them based on regular time intervals and/or data availability and/or external events. The Oozie Coordinator system allows the user to define workflow execution schedules based on these parameters. Oozie coordinator allows to model workflow execution triggers in the form of the predicates, which can reference to data, time and/or external events. The workflow job is started after the predicate is satisfied.

It is also often necessary to connect workflow jobs that run regularly, but at different time intervals. The outputs of multiple subsequent runs of a workflow become the input to the next workflow. Chaining together these workflows result it is referred as a data application pipeline. Oozie coordinator support creation of such data Application pipelines. 



Installing Oozie

Step-1: Prerequisites

You can  follow the instruction provides by oozie office website,to match the right version hadoop stack software. In this tutorial we using oozie version is 3.0.2  which  accesses  available on github ,its System Requirements as follow:

-Unix (tested in Linux and Mac OS X)  .We used  Ubuntu lucid– Server Version in .
-Java 1.6+
-Hadoop
-Apache Hadoop (tested with 0.20.2)
-Yahoo! Hadoop (tested with 0.20.104.2)
-ExtJS library (optional, to enable Oozie webconsole)
-ExtJS 2.2


Step-2: Server Installation

-Download or build an Oozie binary distribution https://github.com/yahoo/oozie/downloads
-Download a Hadoop binary distribution  http://www.us.apache.org/dist/hadoop/common/hadoop-0.20.2/
-Download ExtJS library (it must be version 2.2)  http://extjs.com/deploy/ext-2.2.zip
-Expand  two packages –oozie and hadoop distribution tar.gz as the oozie Unix user which recommended   by office document  in  server installation .Commands as shown below:

->oozie@dm4:~$ tar zxvf oozie-3.0.2-distro.tar.gz -C {oozie home}
->oozie@dm4:~$ tar zxvf hadoop-0.20.2.tar.gz -C {hadoop home}

-Make up oozie.war.Oozie should run on hadoop but its distribution bundle without hadoop jar files and without the ExtJS library(because of  they under different licenses ).We have to run oozie setup shell to pack the required hadoop jar files and optional ExtJS library so as to enable  the Oozie web-console. Oozie Server scripts run only under the Unix user that owns the Oozie installation directory, if necessary use sudo -u OOZIE_USER when invoking the scripts.Commands as shown below:
->$ bin/oozie-setup.sh -hadoop 0.20.2 ${HADOOP_HOME} -extjs /tmp/ext-2.2.zip

-Start up oozie and edit oozie configuration.To start Oozie as a daemon process run:
->$ bin/oozie-start.sh

-Using the Oozie command line tool check the status of Oozie:
->$ bin/oozie admin -oozie http://localhost:11000/oozie -status

Using a browser go to the Oozie web console , Oozie status should be NORMAL .If the status is HTTP 404 Not Found,you can edit the configuration file to fix it.Open conf/oozie-default.xml with vim,copy the property “oozie.services”  into oozie-site.xml. In oozie-site.xml’s  ”oozie.services”  property,one of the service name is “KerberosHadoopAccessorService” .Remove only ”Kerberos” which will make it “HadoopAccessorService”. And then restart oozie.

S.O.L.I.D: Class Design Principles in Java

Understanding S.O.L.I.D Class Design Principles 

Classes are the building blocks of your java application. If these blocks are not strong, your building (i.e. application) is going to face the tough time in future. This essentially means that not so well-written can lead to very difficult situations when the application scope goes up or application faces certain design issues either in production or maintenance.

On the other hand, set of well designed and written classes can speed up the coding process by leaps and bounds, while reducing the number of bugs in comparison.

In this post, I will list down 5 most recommended design principles, you should keep in mind, while writing your classes. These design principles are called SOLID, in short. They also form the best practices to be followed for designing your application classes.


Single Responsibility Principle

The Single Responsibility Principle (SRP) states that there should never be more than one reason for a class to change. This means that you should design your classes so that each has a single purpose. This does not mean that each class should have only one method but that all of the members in the class are related to the class's primary function. Where a class has multiple responsibilities, these should be separated into new classes.

When a class has multiple responsibilities, the likelihood that it will need to be changed increases. Each time a class is modified the risk of introducing bugs grows. By concentrating on a single responsibility, this risk is limited.


Open / Closed Principle

The Open / Closed Principle (OCP) specifies that software entities (classes, modules, functions, etc.) should be open for extension but closed for modification. The "closed" part of the rule states that once a module has been developed and tested, the code should only be adjusted to correct bugs. The "open" part says that you should be able to extend existing code in order to introduce new functionality.
As with the SRP, this principle reduces the risk of new errors being introduced by limiting changes to existing code.


Liskov Substitution Principle (LSP)

The Liskov Substitution Principle (LSP) states that "functions that use pointers or references to base classes must be able to use objects of derived classes without knowing it". When working with languages such as C#, this equates to "code that uses a base class must be able to substitute a subclass without knowing it". The principle is named after Barbara Liskov.
If you create a class with a dependency of a given type, you should be able to provide an object of that type or any of its subclasses without introducing unexpected results and without the dependent class knowing the actual type of the provided dependency. If the type of the dependency must be checked so that behaviour can be modified according to type, or if subtypes generated unexpected rules or side effects, the code may become more complex, rigid and fragile.


Interface Segregation Principle (ISP)

The Interface Segregation Principle (ISP) specifies that clients should not be forced to depend upon interfaces that they do not use. This rule means that when one class depends upon another, the number of members in the interface that is visible to the dependent class should be minimized.

Often when you create a class with a large number of methods and properties, the class is used by other types that only require access to one or two members. The classes are more tightly coupled as the number of members they are aware of grows. When you follow the ISP, large classes implement multiple smaller interfaces that group functions according to their usage. The dependents are linked to these for looser coupling, increasing robustness, flexibility and the possibility of reuse.


Dependency Inversion Principle (DIP)

The Dependency Inversion Principle (DIP) is the last of the five rules. The DIP makes two statements. The first is that high level modules should not depend upon low level modules. Both should depend upon abstractions. The second part of the rule is that abstractions should not depend upon details. Details should depend upon abstractions.

The DIP primarily relates to the concept of layering within applications, where lower level modules deal with very detailed functions and higher level modules use lower level classes to achieve larger tasks. The principle specifies that where dependencies exist between classes, they should be defined using abstractions, such as interfaces, rather than by referencing classes directly. This reduces fragility caused by changes in low level modules introducing bugs in the higher layers. The DIP is often met with the use of dependency injection.

Thread & Runnable In Java


Thread 

Thread in Java is an independent path of execution which is used to run two task in parallel. When two Threads run in parallel that is called multi-threading in Java. Java is multi-threaded from start and excellent support of Thread at language level e.g. java.lang.Thread class, synchronized keyword, volatile and final keyword makes writing concurrent programs easier in Java than any other programming language e.g. C++. Being multi-threaded is also a reason of Java's popularity and being number one programming language. On the other hand if your program divides a task between two threads it also brings lot of programming challenges and issues related to synchronization, deadlock, thread-safety and race conditions. In short answer of question What is Thread in Java can be given like "Thread is a class in Java but also a way to execute something in parallel independently in Java". Thread in Java requires a task which is executed by this thread independently and that task can be either Runnable or Callable.

Runnable

Runnable represent a task in Java which is executed by Thread. java.lang.Runnable is an interface and defines only one method called run(). When a Thread is started in Java by using Thread.start() method it calls run() method of Runnable task which was passed to Thread during creation. Code written inside run() method is executed by this newly created thread. Since start() method internally calls run() method its been a doubt among Java programmers that why not directly call the run() method. This is also asked as what is difference between start() and run() method in Java. Well when you call Runnable interface run() method directly , no new Thread will be created and task defined inside run() method is executed by calling thread.  There is another interface added in Java 1. 5 called Callable which can also be used in place of Runnable interface in Java. Callable provides additional functionality over Runnable in terms of returning result of computation. Since return type of run() method is void it can not return anything which is sometime necessary. On the other hand Callable interface defines call() method which has return type as Future which can be used to return result of computation from Thread in Java.

Example 

package sachi.test.threads;

public class Program{
  public static void main (String[] args) {
    Runner r = new Runner();
    Thread t1 = new Thread(r, "Thread A");
    Thread t2 = new Thread(r, "Thread B");
    Thread s1 = new Strider("Thread C");
    Thread s2 = new Strider("Thread D");
    t1.start();
    t2.start();
    s1.start();
    s2.start();
  }
}
class Runner implements Runnable {
  private int counter;
  public void run() {
    try {
      for (int i = 0; i != 2; i++) {
        System.out.println(Thread.currentThread().getName() + ": " 
              + counter++);
        Thread.sleep(1000);
      }
    }
    catch(InterruptedException e) {
      e.printStackTrace();
    }
  }
}

class Strider extends Thread {   
  private int counter;
  Strider(String name)    {
    super(name);
  }
  public void run()   {
    try {
      for (int i = 0; i != 2; i++) {
        System.out.println(Thread.currentThread().getName() + ": " 
            + counter++);
        Thread.sleep(1000);
      }
    }
    catch(InterruptedException e)     {
      e.printStackTrace();
    }
  }
}

Output:

Thread B: 1
Thread D: 0
Thread C: 0
Thread A: 0
Thread D: 1
Thread A: 3
Thread C: 1
Thread B: 2


Difference between Threads & Runnable.

1. Implementing Runnable is the preferred way to do it. Here, you’re not really specializing or modifying the thread’s behavior. You’re just giving the thread something to run. That means composition is the better way to go.

2. Java only supports single inheritance, so you can only extend one class.

3. Instantiating an interface gives a cleaner separation between your code and the implementation of threads.

4. Implementing Runnable makes your class more flexible. If you extend thread then the action you’re doing is always going to be in a thread. However, if you extend Runnable it doesn’t have to be. You can run it in a thread, or pass it to some kind of executor service, or just pass it around as a task within a single threaded application.

5. By extending Thread, each of your threads has a unique object associated with it, whereas implementing Runnable, many threads can share the same runnable instance.