SAS® Examples

Reference number for some of the following examples refer to problems in "The Little SAS Book" by Lora D. Delwiche and Susan J. Slaughter.

The examples below are set up so you can highlight and copy the SAS code directly into the SAS program editor or enhanced editor. Many of the example use data stored on the g:\Stats_Data\ST553 drive on the computer network in the Milne labs and won't work on other networks without modification. To modify, simple save copies of the datasets onto a diskette (a:\ drive) and change the path from "g:\Stats_Data\ST553" to "a:\" .


Creating SAS datasets

  1. Read a SAS dataset
  2. Create a SAS dataset from raw data
    1. List input
    2. Column input
    3. Formated input
    4. Mixed input
  3. Include data in program
  4. Create permanent SAS dataset
Working with SAS datasets,
  1. Assignment statements
  2. Functions
  3. IF-THEN-ELSE, SELECT and LABEL statements
  4. Subsetting data
  5. DO-WHILE loop
  6. DO-UNTIL loop
Procedures
  1. PROC PRINT and PROC SORT
  2. PROC MEANS, PROC SORT and PROC PRINT
  3. PROC FREQ
  4. PROC UNIVARIATE
  5. PROC REG
  6. PROC GLM
  7. PROC GPLOT


Creating SAS Datasets

1. Read in an existing SAS dataset (SET)

libname in 'g:\stats_data\st553\sasdata';

DATA scores;
  SET in.classscores;
RUN;

PROC PRINT data=scores;
RUN;



2. Create a SAS dataset from raw data (INFILE, INPUT)

a. List input or space delimited
* Create a SAS data set named toads;
* Read the data file ToadJump.dat using list input;

DATA toads;
  INFILE "g:\stats_data\st553\ascii_data\ToadJump.dat";
  INPUT ToadName $ Weight Jump1 Jump2 Jump3;

* Print the data to make sure the file was read correctly;

PROC PRINT DATA=toads;
TITLE "SAS Data Set Toads";
RUN;
 
**********************************************************************;

* Read data from external file using a FILENAME statement;

FILENAME raw "g:\stats_data\st553\ascii_data";
DATA uspresidents2;
  INFILE raw(President.dat);
  INPUT President $ Party $ Number;

PROC PRINT data=uspresidents2;
RUN;

b. Column input
* Create a SAS dataset named sales;
* Read the data file Onions.dat using column input;

DATA sales;
  INFILE "g:\stats_data\st553\ascii_data\Onions.dat";
  INPUT VisitingTeam $ 1-20 ConcessionSales 21-24 BleacherSales 25-28
        OurHits 29-31 TheirHits 32-34 OurRuns 35-37 TheirRuns 38-40;

* Print the data to make sure the file was read correctly;

PROC PRINT DATA=sales;
TITLE "SAS data set sales";
RUN;

c. Formated input
* Create a SAS dataset named contest;
* Read the file Pumpkin.dat using formatted input;

DATA contest;
  INFILE "g:\stats_data\st553\ascii_data\Pumpkin.dat";
  INPUT Name $16. Age 3. +1 Type $1. +1 Date mmddyy10.
        (Score1 Score2 Score3 Score4 Score5) (4.1);

* Print the data set to make sure the file was read correctly;

PROC PRINT DATA=contest;
TITLE "Pumpkin Carving Contest";
RUN;

d. Mixed input
* Create a SAS data set named nationalparks;
* Read a data file Park.dat mixing input styles;

DATA nationalparks;
  INFILE "g:\stats_data\st553\ascii_data\Park.dat";
  INPUT ParkName $ 1-22 State $ Year @40 Acreage COMMA9.;

PROC PRINT DATA = nationalparks;
TITLE "Selected National Parks";
RUN;



PROC PRINT DATA = nationalparks;
TITLE "Selected National Parks";
FORMAT acreage comma9.;
RUN;



3. Include data in a SAS program (DATASTEP, INPUT, DATALINES/CARDS)

 
DATA sales;
  INPUT name $ Class DateReturned mmddyy10. CandyType$ Quantity;
CARDS;
Adriana 21 3/21/2000 MP 7
Nathan 14 3.21.2000 CD 19
Matthew 14 3.21.2000 CD 14
Claire 14 3.22.2000 CD 11
Caitlin 21 3.24.2000 CD 9
Ian 21 3.24.2000 MP 18
Chris 14 3.25.2000 MP 6
Anthony 21 3.25.2000 MP 13
Stephen 14 3.25.2000 CD 10
Erika 21 3.25.2000 MP 17
;

PROC PRINT;
RUN;

*****************************************************************************;
 
DATA flowers;
  INPUT CustomerID$ SaleDate mmddyy10. Petunia SnapDragon Marigold;
DATALINES;
756-01 05.04.2001 120  80 110
834-01 05.12.2001  90 160  60
901-02 05.18.2001  50 100  75
834-01 06.01.2001  80  60 100
756-01 06.11.2001 100 160  75
901-02 06.19.2001  60  60  60
756-01 06.25.2001  85 110 100
;

PROC PRINT;
RUN;

*****************************************************************************;


DATA coffee;
  INPUT coffee $ window $ @@;
CARDS;
esp w cap d cap w kon w ice w kon d esp d kon w ice d esp d 
cap w esp d cap d Kon d .   d kon w esp d cap w ice w kon w
kon w kon w ice d esp d kon w esp d esp w kon w cap w kon w
;

PROC PRINT;
RUN;
*****************************************************************************;

DATA ClassScores;
  INPUT score @@;
DATALINES;
56 78 84 73 90 44 76 87 92 75 85 67 90 84 74 64 73 78 69 56 87 73 100 54 81 78 69 64 73 65
;

PROC PRINT;
RUN;


4. Creating permanent SAS datasets


*  Create a permanent SAS dataset from Mag.dat *;

LIBNAME plants "z:\st553\sasdata";

DATA plants.magnolia;
  INFILE "g:\stats_data\st553\ascii_data\Mag.dat";
  INPUT ScientificName $ 1-14 CommonName $ 16-32 MaximumHeight
        AgeBloom Type $ Color $;

RUN;


PROC PRINT DATA=plants.magnolia;
TITLE "Magnolias";
RUN;

*****************************************************************************;

* Create permanent SAS dataset homegarden data set;

LIBNAME in "z:\st553\sasdata";

DATA in.homegarden;
  INFILE "g:\stats_data\st553\ascii_data\Garden.dat";
  INPUT Name $ 1-7 Tomato Zucchini Peas Grapes;

PROC PRINT DATA=in.homegarden;
TITLE "Home Gardening Survey";

RUN;





Working with SAS Datasets

1. Assignments


* Modify dataset homegarden data set with assignment statements;

LIBNAME in "g:\stats_data\st553\sasdata";

DATA homegarden2;
  set in.homegarden;
  Zone = 14;
  Type = "home";
  Zucchini = Zucchini*10;
  Total = Tomato + Zucchini + Peas + Grapes;
  PerTom = (Tomato / Total) * 100;


PROC PRINT DATA=homegarden2;
TITLE "Home Gardening Survey";

RUN;


2. Functions

* Modify SAS dataset contest data set with functions ;

LIBNAME in "g:\stats_data\st553\sasdata";

PROC PRINT DATA=in.contest;
TITLE "Pumpkin Carving Contest--original";
RUN;

DATA contest;
  set in.contest;
  AvgScore = MEAN(Score1, score2, score3, score4, score5);
  AvgScore2 = MEAN(of Score1-score5);
  DayEntered = DAY(Date);
  Type = UPCASE(Type);
  JudgeDate = mdy(07, 04, 2000);
  JudgeDate2 = '04jul2000'd;

PROC PRINT DATA=contest;
TITLE "Pumpkin Carving Contest--modified";

run;

3. IF-THEN-ELSE, SELECT, and LABEL statements

data grades;
  input id section score @@;
  if score ge 90 then grade = "A";
    else if score ge 80 then grade = "B";
           else if score ge 70 then grade = "C";
                  else if grade ge 60 then grade = "D";
                         else if score ne . then grade = "F";

  select;
    when (score ge 93) plusminus = "A";
    when (score ge 90) plusminus = "A-";
    when (score ge 88) plusminus = "B+";
    when (score ge 83) plusminus = "B";
    when (score ge 80) plusminus = "B-";
    when (score ge 78) plusminus = "C+";
    when (score ge 73) plusminus = "C";
    when (score ge 70) plusminus = "C-";
    when (score ge 68) plusminus = "D+";
    when (score ge 63) plusminus = "D";
    when (score ge 60) plusminus = "D-";
    when (score ne . ) plusminus = "F";
    otherwise;
  end;
  label id        = "Student ID number"
        section   = "Class section number"
        score     = "Exam score"
        grade     = "Traditional grade"
        plusminus = "Plus/Minus grade"
        ;
datalines;
811 1 85 138 1 95 137 1 75 642 1 94 134 1 88 466 1 84 258 1 36 733 1 86 844 1 69
131 2 84 336 2 76 541 2 79 951 2 79 348 2 94 846 2 64 187 2 96 976 2 68 199 2 46
879 3 54 796 3 97 872 3 94 647 3 99 994 3 46 884 3 86 946 3 76 465 3 79 944 3 84
;

proc sort data=grades;
  by section plusminus;

proc print data=grades;
  by section;
  id section;
run;

****************************************************************;

** Creating dummy variables from a single group variable **;

data classdata;
  input section score @@; 
  section1 = 0; section2 = 0; section3 = 0; section4 = 0;
  select (section);
    when (1) section1 = 1;
    when (2) section2 = 1;
    when (3) section3 = 1;
    when (4) section4 = 1;
    otherwise put section=;
  end;
datalines;
1 58 1 36 1 87 1 78 1 89 1 67 
2 76 2 94 2 64 2 84 2 96 2 47 
3 75 3 74 3 86 3 91 3 76 3 84
4 81 4 67 4 97 4 31 4 89 4 67
; 

proc print data=classdata;
run; 

4. Subsetting data

data grades01;
  set grades;
  if section = 1;

proc print;
run;

********************;

data grades0102;
  set grades;
  if section in (1,2);

proc print;
run;
 
*********************;

data grades0102;
  set grades;
  if section = 3 then delete;

proc print; 
run;

*********************;

data grades1AB;
  set grades;
  where section = 1 and (grade = "A" or grade = "B");

proc print;
run;

5. DO-WHILE loop

data classes;
  LENGTH ClassList $60;
  ClassNumber = "ST100"; ClassList = "Jim Johnson, Sally Ryan"; output;
  ClassNumber = "ST101"; ClassList = "Bob Smith, Ally Carson, Doug Anderson"; output;
  ClassNumber = "ST102"; ClassList = "Pete Billingston"; output;
  ClassNumber = "ST103"; ClassList = "John Carpenter, Michelle Dante"; output;
  ClassNumber = "ST104"; ClassList = "Allison Trenton, Melissa Fredrickson, Jon Larson"; output;
  ClassNumber = "ST105"; ClassList = "Jill North"; output;

Proc Print data=classes;
TITLE "Original Data";
RUN;

DATA data1;
  SET classes;
  TempList = ClassList;
  comma = index(TempList, ",");
  DO WHILE (comma gt 0); 
    student = substr(TempList, 1, comma-1);
    output;
    TempList = substr(TempList, comma+2, length(TempList)-comma-1);
    comma = index(TempList, ",");
  END;
  student = TempList;
  OUTPUT;
  DROP TempList comma ClassList;
RUN;

PROC PRINT DATA=data1;
  by ClassNumber;
  id ClassNumber;
TITLE "New Dataset";
RUN;

6. DO-UNTIL loop

data classes;
  LENGTH ClassList $60;
  ClassNumber = "ST100"; ClassList = "Jim Johnson, Sally Ryan"; output;
  ClassNumber = "ST101"; ClassList = "Bob Smith, Ally Carson, Doug Anderson"; output;
  ClassNumber = "ST102"; ClassList = "Pete Billingston"; output;
  ClassNumber = "ST103"; ClassList = "John Carpenter, Michelle Dante"; output;
  ClassNumber = "ST104"; ClassList = "Allison Trenton, Melissa Fredrickson, Jon Larson"; output;
  ClassNumber = "ST105"; ClassList = "Jill North"; output;

Proc Print data=classes;
TITLE "Original Data";
RUN;

DATA data1;
  SET classes;
  TempList = ClassList;
  DO UNTIL (comma = 0); 
    comma = index(TempList, ",");
    if comma = 0 then student = TempList;
                 else student = substr(TempList, 1, comma-1);
    output;
    TempList = substr(TempList, comma+2, length(TempList)-comma-1);
  END;
  DROP TempList comma ClassList;
RUN;

PROC PRINT DATA=data1;
  by ClassNumber;
  id ClassNumber;
TITLE "New Dataset";
RUN;




SAS Procedures

1. PROC PRINT and PROC SORT

libname in "g:\stats_data\st553\sasdata";

data sales;
  set in.sales;
run;

proc print data=sales;
run;

********************************;

proc sort data=sales;
  by Class;

proc print data=sales;
  by class;
  sum profit;
  var name datereturned candytype profit;
title "Candy Sales for Field Trip by Class";
run;

********************************;

proc print data=sales;
  by class;
  id class;
  sum profit;
  var name datereturned candytype profit;
title "Candy Sales for Field Trip by Class -- with ID";
run;

2. PROC MEANS, PROC SORT and PROC PRINT

*** 4.9  PROC MEANS ***;

libname in 'g:\stats_data\st553\sasdata';

data sales;
  set in.flowers;

proc sort data=sales;
  by month;

proc means data=sales;
  by month;
  var petunia snapdragon marigold;
title "Summary of Flower Sales by Month";

run;


*** 4.10  OUTPUT from PROC MEANS ***;

proc sort data=sales;
  by customerid;

proc means data=sales;
  by customerid;
  var petunia snapdragon marigold;
  output out=totals mean = MeanPetunia MeanSnapDragon MeanMarigold
                    sum  = SumPetunia  SumSnapDragon  SumMarigold;

proc print data=totals;
title "Sum or flower data over Customer ID";
format meanpetunia--meanmarigold 3.;

run;

3.PROC FREQ

*** 4.11 proc freq ***;

libname in 'g:\stats_data\st553\sasdata';

data orders;
  set in.coffee;

proc freq data=orders;
  tables window window*coffee;
run;

****;

proc freq;
  tables window*coffee / missprint;
  tables window*coffee / missing;
run; 

****;

data orders;
  set in.coffee;
  coffee = upcase(coffee);

proc freq data=orders;
  tables window*coffee/missprint;

run;

4. PROC UNIVARIATE

*** 7.1 proc univariate ***;

libname in 'g:\stats_data\st553\sasdata';

data class;
  set in.classscores;


proc univariate data=class;
  var score;
title "Class Scores";
run; 

*****;

proc univariate data=class plot;
  var score;
title "Class Scores";
run; 

*****;

proc univariate data=class;
  var score;
  histogram; qqplot; probplot;
title "Class Scores";
run; 

*****;

proc univariate data=class normal;
  var score;
title "Class Scores";
run; 

5. PROC REG

** uses classdata from above example (SELECT statement) **;
data classdata;
  input section score @@; 
  section1 = 0; section2 = 0; section3 = 0; section4 = 0;
  select (section);
    when (1) section1 = 1;
    when (2) section2 = 1;
    when (3) section3 = 1;
    when (4) section4 = 1;
    otherwise put section=;
  end;
datalines;
1 58 1 36 1 87 1 78 1 89 1 67 
2 76 2 94 2 64 2 84 2 96 2 47 
3 75 3 74 3 86 3 91 3 76 3 84
4 81 4 67 4 97 4 31 4 89 4 67
; 


PROC REG data=classdata;
  model score = section1 section2 section3;
run;quit;

6. PROC GLM

 ** uses classdata from above example (SELECT statement) **;
data classdata;
  input section score @@; 
  section1 = 0; section2 = 0; section3 = 0; section4 = 0;
  select (section);
    when (1) section1 = 1;
    when (2) section2 = 1;
    when (3) section3 = 1;
    when (4) section4 = 1;
    otherwise put section=;
  end;
datalines;
1 58 1 36 1 87 1 78 1 89 1 67 
2 76 2 94 2 64 2 84 2 96 2 47 
3 75 3 74 3 86 3 91 3 76 3 84
4 81 4 67 4 97 4 31 4 89 4 67
; 

PROC GLM data=classdata;
   model score = section1 section2 section3 / solution;
run;quit;

PROC GLM data=classdata;
  class section;
  model score = section / solution;
run; quit; 

7. PROC GPLOT

data plotdata;
  do x = 1 to 100;
    y = ranuni(-1);
    output;
  end;

proc print;
run;


symbol v=dot h=.5;
axis1 order=(0 to 1 by .1) minor=none;
axis2 order=(0 to 100 by 10) minor=none;

proc gplot data=plotdata;
  plot y * x / vaxis=axis1 haxis=axis2 ;
run; quit;



Last modified: April 4, 2000